eaiovnaovbqoebvqoeavibavo Basename.pm000064400000025762147633754660006656 0ustar00=head1 NAME File::Basename - Parse file paths into directory, filename and suffix. =head1 SYNOPSIS use File::Basename; ($name,$path,$suffix) = fileparse($fullname,@suffixlist); $name = fileparse($fullname,@suffixlist); $basename = basename($fullname,@suffixlist); $dirname = dirname($fullname); =head1 DESCRIPTION These routines allow you to parse file paths into their directory, filename and suffix. B: C and C emulate the behaviours, and quirks, of the shell and C functions of the same name. See each function's documentation for details. If your concern is just parsing paths it is safer to use L's C and C methods. It is guaranteed that # Where $path_separator is / for Unix, \ for Windows, etc... dirname($path) . $path_separator . basename($path); is equivalent to the original path for all systems but VMS. =cut package File::Basename; # File::Basename is used during the Perl build, when the re extension may # not be available, but we only actually need it if running under tainting. BEGIN { if (${^TAINT}) { require re; re->import('taint'); } } use strict; use 5.006; use warnings; our(@ISA, @EXPORT, $VERSION, $Fileparse_fstype, $Fileparse_igncase); require Exporter; @ISA = qw(Exporter); @EXPORT = qw(fileparse fileparse_set_fstype basename dirname); $VERSION = "2.84"; fileparse_set_fstype($^O); =over 4 =item C X my($filename, $directories, $suffix) = fileparse($path); my($filename, $directories, $suffix) = fileparse($path, @suffixes); my $filename = fileparse($path, @suffixes); The C routine divides a file path into its $directories, $filename and (optionally) the filename $suffix. $directories contains everything up to and including the last directory separator in the $path including the volume (if applicable). The remainder of the $path is the $filename. # On Unix returns ("baz", "/foo/bar/", "") fileparse("/foo/bar/baz"); # On Windows returns ("baz", 'C:\foo\bar\', "") fileparse('C:\foo\bar\baz'); # On Unix returns ("", "/foo/bar/baz/", "") fileparse("/foo/bar/baz/"); If @suffixes are given each element is a pattern (either a string or a C) matched against the end of the $filename. The matching portion is removed and becomes the $suffix. # On Unix returns ("baz", "/foo/bar/", ".txt") fileparse("/foo/bar/baz.txt", qr/\.[^.]*/); If type is non-Unix (see L) then the pattern matching for suffix removal is performed case-insensitively, since those systems are not case-sensitive when opening existing files. You are guaranteed that C<$directories . $filename . $suffix> will denote the same location as the original $path. =cut sub fileparse { my($fullname,@suffices) = @_; unless (defined $fullname) { require Carp; Carp::croak("fileparse(): need a valid pathname"); } my $orig_type = ''; my($type,$igncase) = ($Fileparse_fstype, $Fileparse_igncase); my($taint) = substr($fullname,0,0); # Is $fullname tainted? if ($type eq "VMS" and $fullname =~ m{/} ) { # We're doing Unix emulation $orig_type = $type; $type = 'Unix'; } my($dirpath, $basename); if (grep { $type eq $_ } qw(MSDOS DOS MSWin32 Epoc)) { ($dirpath,$basename) = ($fullname =~ /^((?:.*[:\\\/])?)(.*)/s); $dirpath .= '.\\' unless $dirpath =~ /[\\\/]\z/; } elsif ($type eq "OS2") { ($dirpath,$basename) = ($fullname =~ m#^((?:.*[:\\/])?)(.*)#s); $dirpath = './' unless $dirpath; # Can't be 0 $dirpath .= '/' unless $dirpath =~ m#[\\/]\z#; } elsif ($type eq "MacOS") { ($dirpath,$basename) = ($fullname =~ /^(.*:)?(.*)/s); $dirpath = ':' unless $dirpath; } elsif ($type eq "AmigaOS") { ($dirpath,$basename) = ($fullname =~ /(.*[:\/])?(.*)/s); $dirpath = './' unless $dirpath; } elsif ($type eq 'VMS' ) { ($dirpath,$basename) = ($fullname =~ /^(.*[:>\]])?(.*)/s); $dirpath ||= ''; # should always be defined } else { # Default to Unix semantics. ($dirpath,$basename) = ($fullname =~ m{^(.*/)?(.*)}s); if ($orig_type eq 'VMS' and $fullname =~ m{^(/[^/]+/000000(/|$))(.*)}) { # dev:[000000] is top of VMS tree, similar to Unix '/' # so strip it off and treat the rest as "normal" my $devspec = $1; my $remainder = $3; ($dirpath,$basename) = ($remainder =~ m{^(.*/)?(.*)}s); $dirpath ||= ''; # should always be defined $dirpath = $devspec.$dirpath; } $dirpath = './' unless $dirpath; } my $tail = ''; my $suffix = ''; if (@suffices) { foreach $suffix (@suffices) { my $pat = ($igncase ? '(?i)' : '') . "($suffix)\$"; if ($basename =~ s/$pat//s) { $taint .= substr($suffix,0,0); $tail = $1 . $tail; } } } # Ensure taint is propagated from the path to its pieces. $tail .= $taint; wantarray ? ($basename .= $taint, $dirpath .= $taint, $tail) : ($basename .= $taint); } =item C X X my $filename = basename($path); my $filename = basename($path, @suffixes); This function is provided for compatibility with the Unix shell command C. It does B always return the file name portion of a path as you might expect. To be safe, if you want the file name portion of a path use C. C returns the last level of a filepath even if the last level is clearly directory. In effect, it is acting like C for paths. This differs from C's behaviour. # Both return "bar" basename("/foo/bar"); basename("/foo/bar/"); @suffixes work as in C except all regex metacharacters are quoted. # These two function calls are equivalent. my $filename = basename("/foo/bar/baz.txt", ".txt"); my $filename = fileparse("/foo/bar/baz.txt", qr/\Q.txt\E/); Also note that in order to be compatible with the shell command, C does not strip off a suffix if it is identical to the remaining characters in the filename. =cut sub basename { my($path) = shift; # From BSD basename(1) # The basename utility deletes any prefix ending with the last slash '/' # character present in string (after first stripping trailing slashes) _strip_trailing_sep($path); my($basename, $dirname, $suffix) = fileparse( $path, map("\Q$_\E",@_) ); # From BSD basename(1) # The suffix is not stripped if it is identical to the remaining # characters in string. if( length $suffix and !length $basename ) { $basename = $suffix; } # Ensure that basename '/' == '/' if( !length $basename ) { $basename = $dirname; } return $basename; } =item C X This function is provided for compatibility with the Unix shell command C and has inherited some of its quirks. In spite of its name it does B always return the directory name as you might expect. To be safe, if you want the directory name of a path use C. Only on VMS (where there is no ambiguity between the file and directory portions of a path) and AmigaOS (possibly due to an implementation quirk in this module) does C work like C, returning just the $directories. # On VMS and AmigaOS my $directories = dirname($path); When using Unix or MSDOS syntax this emulates the C shell function which is subtly different from how C works. It returns all but the last level of a file path even if the last level is clearly a directory. In effect, it is not returning the directory portion but simply the path one level up acting like C for file paths. Also unlike C, C does not include a trailing slash on its returned path. # returns /foo/bar. fileparse() would return /foo/bar/ dirname("/foo/bar/baz"); # also returns /foo/bar despite the fact that baz is clearly a # directory. fileparse() would return /foo/bar/baz/ dirname("/foo/bar/baz/"); # returns '.'. fileparse() would return 'foo/' dirname("foo/"); Under VMS, if there is no directory information in the $path, then the current default device and directory is used. =cut sub dirname { my $path = shift; my($type) = $Fileparse_fstype; if( $type eq 'VMS' and $path =~ m{/} ) { # Parse as Unix local($File::Basename::Fileparse_fstype) = ''; return dirname($path); } my($basename, $dirname) = fileparse($path); if ($type eq 'VMS') { $dirname ||= $ENV{DEFAULT}; } elsif ($type eq 'MacOS') { if( !length($basename) && $dirname !~ /^[^:]+:\z/) { _strip_trailing_sep($dirname); ($basename,$dirname) = fileparse $dirname; } $dirname .= ":" unless $dirname =~ /:\z/; } elsif (grep { $type eq $_ } qw(MSDOS DOS MSWin32 OS2)) { _strip_trailing_sep($dirname); unless( length($basename) ) { ($basename,$dirname) = fileparse $dirname; _strip_trailing_sep($dirname); } } elsif ($type eq 'AmigaOS') { if ( $dirname =~ /:\z/) { return $dirname } chop $dirname; $dirname =~ s{[^:/]+\z}{} unless length($basename); } else { _strip_trailing_sep($dirname); unless( length($basename) ) { ($basename,$dirname) = fileparse $dirname; _strip_trailing_sep($dirname); } } $dirname; } # Strip the trailing path separator. sub _strip_trailing_sep { my $type = $Fileparse_fstype; if ($type eq 'MacOS') { $_[0] =~ s/([^:]):\z/$1/s; } elsif (grep { $type eq $_ } qw(MSDOS DOS MSWin32 OS2)) { $_[0] =~ s/([^:])[\\\/]*\z/$1/; } else { $_[0] =~ s{(.)/*\z}{$1}s; } } =item C X my $type = fileparse_set_fstype(); my $previous_type = fileparse_set_fstype($type); Normally File::Basename will assume a file path type native to your current operating system (ie. /foo/bar style on Unix, \foo\bar on Windows, etc...). With this function you can override that assumption. Valid $types are "MacOS", "VMS", "AmigaOS", "OS2", "RISCOS", "MSWin32", "DOS" (also "MSDOS" for backwards bug compatibility), "Epoc" and "Unix" (all case-insensitive). If an unrecognized $type is given "Unix" will be assumed. If you've selected VMS syntax, and the file specification you pass to one of these routines contains a "/", they assume you are using Unix emulation and apply the Unix syntax rules instead, for that function call only. =back =cut BEGIN { my @Ignore_Case = qw(MacOS VMS AmigaOS OS2 RISCOS MSWin32 MSDOS DOS Epoc); my @Types = (@Ignore_Case, qw(Unix)); sub fileparse_set_fstype { my $old = $Fileparse_fstype; if (@_) { my $new_type = shift; $Fileparse_fstype = 'Unix'; # default foreach my $type (@Types) { $Fileparse_fstype = $type if $new_type =~ /^$type/i; } $Fileparse_igncase = (grep $Fileparse_fstype eq $_, @Ignore_Case) ? 1 : 0; } return $old; } } 1; =head1 SEE ALSO L, L, L GlobMapper.pm000064400000036564147633754660007175 0ustar00package File::GlobMapper; use strict; use warnings; use Carp; our ($CSH_GLOB); BEGIN { if ($] < 5.006) { require File::BSDGlob; import File::BSDGlob qw(:glob) ; $CSH_GLOB = File::BSDGlob::GLOB_CSH() ; *globber = \&File::BSDGlob::csh_glob; } else { require File::Glob; import File::Glob qw(:glob) ; $CSH_GLOB = File::Glob::GLOB_CSH() ; #*globber = \&File::Glob::bsd_glob; *globber = \&File::Glob::csh_glob; } } our ($Error); our ($VERSION, @EXPORT_OK); $VERSION = '1.000'; @EXPORT_OK = qw( globmap ); our ($noPreBS, $metachars, $matchMetaRE, %mapping, %wildCount); $noPreBS = '(? '([^/]*)', '?' => '([^/])', '.' => '\.', '[' => '([', '(' => '(', ')' => ')', ); %wildCount = map { $_ => 1 } qw/ * ? . { ( [ /; sub globmap ($$;) { my $inputGlob = shift ; my $outputGlob = shift ; my $obj = new File::GlobMapper($inputGlob, $outputGlob, @_) or croak "globmap: $Error" ; return $obj->getFileMap(); } sub new { my $class = shift ; my $inputGlob = shift ; my $outputGlob = shift ; # TODO -- flags needs to default to whatever File::Glob does my $flags = shift || $CSH_GLOB ; #my $flags = shift ; $inputGlob =~ s/^\s*\<\s*//; $inputGlob =~ s/\s*\>\s*$//; $outputGlob =~ s/^\s*\<\s*//; $outputGlob =~ s/\s*\>\s*$//; my %object = ( InputGlob => $inputGlob, OutputGlob => $outputGlob, GlobFlags => $flags, Braces => 0, WildCount => 0, Pairs => [], Sigil => '#', ); my $self = bless \%object, ref($class) || $class ; $self->_parseInputGlob() or return undef ; $self->_parseOutputGlob() or return undef ; my @inputFiles = globber($self->{InputGlob}, $flags) ; if (GLOB_ERROR) { $Error = $!; return undef ; } #if (whatever) { my $missing = grep { ! -e $_ } @inputFiles ; if ($missing) { $Error = "$missing input files do not exist"; return undef ; } } $self->{InputFiles} = \@inputFiles ; $self->_getFiles() or return undef ; return $self; } sub _retError { my $string = shift ; $Error = "$string in input fileglob" ; return undef ; } sub _unmatched { my $delimeter = shift ; _retError("Unmatched $delimeter"); return undef ; } sub _parseBit { my $self = shift ; my $string = shift ; my $out = ''; my $depth = 0 ; while ($string =~ s/(.*?)$noPreBS(,|$matchMetaRE)//) { $out .= quotemeta($1) ; $out .= $mapping{$2} if defined $mapping{$2}; ++ $self->{WildCount} if $wildCount{$2} ; if ($2 eq ',') { return _unmatched "(" if $depth ; $out .= '|'; } elsif ($2 eq '(') { ++ $depth ; } elsif ($2 eq ')') { return _unmatched ")" if ! $depth ; -- $depth ; } elsif ($2 eq '[') { # TODO -- quotemeta & check no '/' # TODO -- check for \] & other \ within the [] $string =~ s#(.*?\])## or return _unmatched "[" ; $out .= "$1)" ; } elsif ($2 eq ']') { return _unmatched "]" ; } elsif ($2 eq '{' || $2 eq '}') { return _retError "Nested {} not allowed" ; } } $out .= quotemeta $string; return _unmatched "(" if $depth ; return $out ; } sub _parseInputGlob { my $self = shift ; my $string = $self->{InputGlob} ; my $inGlob = ''; # Multiple concatenated *'s don't make sense #$string =~ s#\*\*+#*# ; # TODO -- Allow space to delimit patterns? #my @strings = split /\s+/, $string ; #for my $str (@strings) my $out = ''; my $depth = 0 ; while ($string =~ s/(.*?)$noPreBS($matchMetaRE)//) { $out .= quotemeta($1) ; $out .= $mapping{$2} if defined $mapping{$2}; ++ $self->{WildCount} if $wildCount{$2} ; if ($2 eq '(') { ++ $depth ; } elsif ($2 eq ')') { return _unmatched ")" if ! $depth ; -- $depth ; } elsif ($2 eq '[') { # TODO -- quotemeta & check no '/' or '(' or ')' # TODO -- check for \] & other \ within the [] $string =~ s#(.*?\])## or return _unmatched "["; $out .= "$1)" ; } elsif ($2 eq ']') { return _unmatched "]" ; } elsif ($2 eq '}') { return _unmatched "}" ; } elsif ($2 eq '{') { # TODO -- check no '/' within the {} # TODO -- check for \} & other \ within the {} my $tmp ; unless ( $string =~ s/(.*?)$noPreBS\}//) { return _unmatched "{"; } #$string =~ s#(.*?)\}##; #my $alt = join '|', # map { quotemeta $_ } # split "$noPreBS,", $1 ; my $alt = $self->_parseBit($1); defined $alt or return 0 ; $out .= "($alt)" ; ++ $self->{Braces} ; } } return _unmatched "(" if $depth ; $out .= quotemeta $string ; $self->{InputGlob} =~ s/$noPreBS[\(\)]//g; $self->{InputPattern} = $out ; #print "# INPUT '$self->{InputGlob}' => '$out'\n"; return 1 ; } sub _parseOutputGlob { my $self = shift ; my $string = $self->{OutputGlob} ; my $maxwild = $self->{WildCount}; if ($self->{GlobFlags} & GLOB_TILDE) #if (1) { $string =~ s{ ^ ~ # find a leading tilde ( # save this in $1 [^/] # a non-slash character * # repeated 0 or more times (0 means me) ) }{ $1 ? (getpwnam($1))[7] : ( $ENV{HOME} || $ENV{LOGDIR} ) }ex; } # max #1 must be == to max no of '*' in input while ( $string =~ m/#(\d)/g ) { croak "Max wild is #$maxwild, you tried #$1" if $1 > $maxwild ; } my $noPreBS = '(?{OutputGlob}' => '$string'\n"; $self->{OutputPattern} = $string ; return 1 ; } sub _getFiles { my $self = shift ; my %outInMapping = (); my %inFiles = () ; foreach my $inFile (@{ $self->{InputFiles} }) { next if $inFiles{$inFile} ++ ; my $outFile = $inFile ; if ( $inFile =~ m/$self->{InputPattern}/ ) { no warnings 'uninitialized'; eval "\$outFile = $self->{OutputPattern};" ; if (defined $outInMapping{$outFile}) { $Error = "multiple input files map to one output file"; return undef ; } $outInMapping{$outFile} = $inFile; push @{ $self->{Pairs} }, [$inFile, $outFile]; } } return 1 ; } sub getFileMap { my $self = shift ; return $self->{Pairs} ; } sub getHash { my $self = shift ; return { map { $_->[0] => $_->[1] } @{ $self->{Pairs} } } ; } 1; __END__ =head1 NAME File::GlobMapper - Extend File Glob to Allow Input and Output Files =head1 SYNOPSIS use File::GlobMapper qw( globmap ); my $aref = globmap $input => $output or die $File::GlobMapper::Error ; my $gm = new File::GlobMapper $input => $output or die $File::GlobMapper::Error ; =head1 DESCRIPTION This module needs Perl5.005 or better. This module takes the existing C module as a starting point and extends it to allow new filenames to be derived from the files matched by C. This can be useful when carrying out batch operations on multiple files that have both an input filename and output filename and the output file can be derived from the input filename. Examples of operations where this can be useful include, file renaming, file copying and file compression. =head2 Behind The Scenes To help explain what C does, consider what code you would write if you wanted to rename all files in the current directory that ended in C<.tar.gz> to C<.tgz>. So say these files are in the current directory alpha.tar.gz beta.tar.gz gamma.tar.gz and they need renamed to this alpha.tgz beta.tgz gamma.tgz Below is a possible implementation of a script to carry out the rename (error cases have been omitted) foreach my $old ( glob "*.tar.gz" ) { my $new = $old; $new =~ s#(.*)\.tar\.gz$#$1.tgz# ; rename $old => $new or die "Cannot rename '$old' to '$new': $!\n; } Notice that a file glob pattern C<*.tar.gz> was used to match the C<.tar.gz> files, then a fairly similar regular expression was used in the substitute to allow the new filename to be created. Given that the file glob is just a cut-down regular expression and that it has already done a lot of the hard work in pattern matching the filenames, wouldn't it be handy to be able to use the patterns in the fileglob to drive the new filename? Well, that's I what C does. Here is same snippet of code rewritten using C for my $pair (globmap '<*.tar.gz>' => '<#1.tgz>' ) { my ($from, $to) = @$pair; rename $from => $to or die "Cannot rename '$old' to '$new': $!\n; } So how does it work? Behind the scenes the C function does a combination of a file glob to match existing filenames followed by a substitute to create the new filenames. Notice how both parameters to C are strings that are delimited by <>. This is done to make them look more like file globs - it is just syntactic sugar, but it can be handy when you want the strings to be visually distinctive. The enclosing <> are optional, so you don't have to use them - in fact the first thing globmap will do is remove these delimiters if they are present. The first parameter to C, C<*.tar.gz>, is an I. Once the enclosing "< ... >" is removed, this is passed (more or less) unchanged to C to carry out a file match. Next the fileglob C<*.tar.gz> is transformed behind the scenes into a full Perl regular expression, with the additional step of wrapping each transformed wildcard metacharacter sequence in parenthesis. In this case the input fileglob C<*.tar.gz> will be transformed into this Perl regular expression ([^/]*)\.tar\.gz Wrapping with parenthesis allows the wildcard parts of the Input File Glob to be referenced by the second parameter to C, C<#1.tgz>, the I. This parameter operates just like the replacement part of a substitute command. The difference is that the C<#1> syntax is used to reference sub-patterns matched in the input fileglob, rather than the C<$1> syntax that is used with perl regular expressions. In this case C<#1> is used to refer to the text matched by the C<*> in the Input File Glob. This makes it easier to use this module where the parameters to C are typed at the command line. The final step involves passing each filename matched by the C<*.tar.gz> file glob through the derived Perl regular expression in turn and expanding the output fileglob using it. The end result of all this is a list of pairs of filenames. By default that is what is returned by C. In this example the data structure returned will look like this ( ['alpha.tar.gz' => 'alpha.tgz'], ['beta.tar.gz' => 'beta.tgz' ], ['gamma.tar.gz' => 'gamma.tgz'] ) Each pair is an array reference with two elements - namely the I filename, that C has matched, and a I filename that is derived from the I filename. =head2 Limitations C has been kept simple deliberately, so it isn't intended to solve all filename mapping operations. Under the hood C (or for older versions of Perl, C) is used to match the files, so you will never have the flexibility of full Perl regular expression. =head2 Input File Glob The syntax for an Input FileGlob is identical to C, except for the following =over 5 =item 1. No nested {} =item 2. Whitespace does not delimit fileglobs. =item 3. The use of parenthesis can be used to capture parts of the input filename. =item 4. If an Input glob matches the same file more than once, only the first will be used. =back The syntax =over 5 =item B<~> =item B<~user> =item B<.> Matches a literal '.'. Equivalent to the Perl regular expression \. =item B<*> Matches zero or more characters, except '/'. Equivalent to the Perl regular expression [^/]* =item B Matches zero or one character, except '/'. Equivalent to the Perl regular expression [^/]? =item B<\> Backslash is used, as usual, to escape the next character. =item B<[]> Character class. =item B<{,}> Alternation =item B<()> Capturing parenthesis that work just like perl =back Any other character it taken literally. =head2 Output File Glob The Output File Glob is a normal string, with 2 glob-like features. The first is the '*' metacharacter. This will be replaced by the complete filename matched by the input file glob. So *.c *.Z The second is Output FileGlobs take the =over 5 =item "*" The "*" character will be replaced with the complete input filename. =item #1 Patterns of the form /#\d/ will be replaced with the =back =head2 Returned Data =head1 EXAMPLES =head2 A Rename script Below is a simple "rename" script that uses C to determine the source and destination filenames. use File::GlobMapper qw(globmap) ; use File::Copy; die "rename: Usage rename 'from' 'to'\n" unless @ARGV == 2 ; my $fromGlob = shift @ARGV; my $toGlob = shift @ARGV; my $pairs = globmap($fromGlob, $toGlob) or die $File::GlobMapper::Error; for my $pair (@$pairs) { my ($from, $to) = @$pair; move $from => $to ; } Here is an example that renames all c files to cpp. $ rename '*.c' '#1.cpp' =head2 A few example globmaps Below are a few examples of globmaps To copy all your .c file to a backup directory '' '' If you want to compress all '' '<*.gz>' To uncompress '' '' =head1 SEE ALSO L =head1 AUTHOR The I module was written by Paul Marquess, F. =head1 COPYRIGHT AND LICENSE Copyright (c) 2005 Paul Marquess. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Compare.pm000064400000010354147633754660006520 0ustar00package File::Compare; use 5.006; use strict; use warnings; our($VERSION, @ISA, @EXPORT, @EXPORT_OK, $Too_Big); require Exporter; $VERSION = '1.1006'; @ISA = qw(Exporter); @EXPORT = qw(compare); @EXPORT_OK = qw(cmp compare_text); $Too_Big = 1024 * 1024 * 2; sub croak { require Carp; goto &Carp::croak; } sub compare { croak("Usage: compare( file1, file2 [, buffersize]) ") unless(@_ == 2 || @_ == 3); my ($from,$to,$size) = @_; my $text_mode = defined($size) && (ref($size) eq 'CODE' || $size < 0); my ($fromsize,$closefrom,$closeto); local (*FROM, *TO); croak("from undefined") unless (defined $from); croak("to undefined") unless (defined $to); if (ref($from) && (UNIVERSAL::isa($from,'GLOB') || UNIVERSAL::isa($from,'IO::Handle'))) { *FROM = *$from; } elsif (ref(\$from) eq 'GLOB') { *FROM = $from; } else { open(FROM,"<",$from) or goto fail_open1; unless ($text_mode) { binmode FROM; $fromsize = -s FROM; } $closefrom = 1; } if (ref($to) && (UNIVERSAL::isa($to,'GLOB') || UNIVERSAL::isa($to,'IO::Handle'))) { *TO = *$to; } elsif (ref(\$to) eq 'GLOB') { *TO = $to; } else { open(TO,"<",$to) or goto fail_open2; binmode TO unless $text_mode; $closeto = 1; } if (!$text_mode && $closefrom && $closeto) { # If both are opened files we know they differ if their size differ goto fail_inner if $fromsize != -s TO; } if ($text_mode) { local $/ = "\n"; my ($fline,$tline); while (defined($fline = )) { goto fail_inner unless defined($tline = ); if (ref $size) { # $size contains ref to comparison function goto fail_inner if &$size($fline, $tline); } else { goto fail_inner if $fline ne $tline; } } goto fail_inner if defined($tline = ); } else { unless (defined($size) && $size > 0) { $size = $fromsize || -s TO || 0; $size = 1024 if $size < 512; $size = $Too_Big if $size > $Too_Big; } my ($fr,$tr,$fbuf,$tbuf); $fbuf = $tbuf = ''; while(defined($fr = read(FROM,$fbuf,$size)) && $fr > 0) { unless (defined($tr = read(TO,$tbuf,$fr)) && $tbuf eq $fbuf) { goto fail_inner; } } goto fail_inner if defined($tr = read(TO,$tbuf,$size)) && $tr > 0; } close(TO) || goto fail_open2 if $closeto; close(FROM) || goto fail_open1 if $closefrom; return 0; # All of these contortions try to preserve error messages... fail_inner: close(TO) || goto fail_open2 if $closeto; close(FROM) || goto fail_open1 if $closefrom; return 1; fail_open2: if ($closefrom) { my $status = $!; $! = 0; close FROM; $! = $status unless $!; } fail_open1: return -1; } sub cmp; *cmp = \&compare; sub compare_text { my ($from,$to,$cmp) = @_; croak("Usage: compare_text( file1, file2 [, cmp-function])") unless @_ == 2 || @_ == 3; croak("Third arg to compare_text() function must be a code reference") if @_ == 3 && ref($cmp) ne 'CODE'; # Using a negative buffer size puts compare into text_mode too $cmp = -1 unless defined $cmp; compare($from, $to, $cmp); } 1; __END__ =head1 NAME File::Compare - Compare files or filehandles =head1 SYNOPSIS use File::Compare; if (compare("file1","file2") == 0) { print "They're equal\n"; } =head1 DESCRIPTION The File::Compare::compare function compares the contents of two sources, each of which can be a file or a file handle. It is exported from File::Compare by default. File::Compare::cmp is a synonym for File::Compare::compare. It is exported from File::Compare only by request. File::Compare::compare_text does a line by line comparison of the two files. It stops as soon as a difference is detected. compare_text() accepts an optional third argument: This must be a CODE reference to a line comparison function, which returns 0 when both lines are considered equal. For example: compare_text($file1, $file2) is basically equivalent to compare_text($file1, $file2, sub {$_[0] ne $_[1]} ) =head1 RETURN File::Compare::compare and its sibling functions return 0 if the files are equal, 1 if the files are unequal, or -1 if an error was encountered. =head1 AUTHOR File::Compare was written by Nick Ing-Simmons. Its original documentation was written by Chip Salzenberg. =cut Copy.pm000064400000044403147633754660006046 0ustar00# File/Copy.pm. Written in 1994 by Aaron Sherman . This # source code has been placed in the public domain by the author. # Please be kind and preserve the documentation. # # Additions copyright 1996 by Charles Bailey. Permission is granted # to distribute the revised code under the same terms as Perl itself. package File::Copy; use 5.006; use strict; use warnings; no warnings 'newline'; use File::Spec; use Config; # During perl build, we need File::Copy but Scalar::Util might not be built yet # And then we need these games to avoid loading overload, as that will # confuse miniperl during the bootstrap of perl. my $Scalar_Util_loaded = eval q{ require Scalar::Util; require overload; 1 }; our(@ISA, @EXPORT, @EXPORT_OK, $VERSION, $Too_Big, $Syscopy_is_copy); sub copy; sub syscopy; sub cp; sub mv; $VERSION = '2.23'; require Exporter; @ISA = qw(Exporter); @EXPORT = qw(copy move); @EXPORT_OK = qw(cp mv); $Too_Big = 1024 * 1024 * 2; sub croak { require Carp; goto &Carp::croak; } sub carp { require Carp; goto &Carp::carp; } # Look up the feature settings on VMS using VMS::Feature when available. my $use_vms_feature = 0; BEGIN { if ($^O eq 'VMS') { if (eval { local $SIG{__DIE__}; require VMS::Feature; }) { $use_vms_feature = 1; } } } # Need to look up the UNIX report mode. This may become a dynamic mode # in the future. sub _vms_unix_rpt { my $unix_rpt; if ($use_vms_feature) { $unix_rpt = VMS::Feature::current("filename_unix_report"); } else { my $env_unix_rpt = $ENV{'DECC$FILENAME_UNIX_REPORT'} || ''; $unix_rpt = $env_unix_rpt =~ /^[ET1]/i; } return $unix_rpt; } # Need to look up the EFS character set mode. This may become a dynamic # mode in the future. sub _vms_efs { my $efs; if ($use_vms_feature) { $efs = VMS::Feature::current("efs_charset"); } else { my $env_efs = $ENV{'DECC$EFS_CHARSET'} || ''; $efs = $env_efs =~ /^[ET1]/i; } return $efs; } sub _catname { my($from, $to) = @_; if (not defined &basename) { require File::Basename; import File::Basename 'basename'; } return File::Spec->catfile($to, basename($from)); } # _eq($from, $to) tells whether $from and $to are identical sub _eq { my ($from, $to) = map { $Scalar_Util_loaded && Scalar::Util::blessed($_) && overload::Method($_, q{""}) ? "$_" : $_ } (@_); return '' if ( (ref $from) xor (ref $to) ); return $from == $to if ref $from; return $from eq $to; } sub copy { croak("Usage: copy(FROM, TO [, BUFFERSIZE]) ") unless(@_ == 2 || @_ == 3); my $from = shift; my $to = shift; my $size; if (@_) { $size = shift(@_) + 0; croak("Bad buffer size for copy: $size\n") unless ($size > 0); } my $from_a_handle = (ref($from) ? (ref($from) eq 'GLOB' || UNIVERSAL::isa($from, 'GLOB') || UNIVERSAL::isa($from, 'IO::Handle')) : (ref(\$from) eq 'GLOB')); my $to_a_handle = (ref($to) ? (ref($to) eq 'GLOB' || UNIVERSAL::isa($to, 'GLOB') || UNIVERSAL::isa($to, 'IO::Handle')) : (ref(\$to) eq 'GLOB')); if (_eq($from, $to)) { # works for references, too carp("'$from' and '$to' are identical (not copied)"); # The "copy" was a success as the source and destination contain # the same data. return 1; } if ((($Config{d_symlink} && $Config{d_readlink}) || $Config{d_link}) && !($^O eq 'MSWin32' || $^O eq 'os2')) { my @fs = stat($from); if (@fs) { my @ts = stat($to); if (@ts && $fs[0] == $ts[0] && $fs[1] == $ts[1] && !-p $from) { carp("'$from' and '$to' are identical (not copied)"); return 0; } } } if (!$from_a_handle && !$to_a_handle && -d $to && ! -d $from) { $to = _catname($from, $to); } if (defined &syscopy && !$Syscopy_is_copy && !$to_a_handle && !($from_a_handle && $^O eq 'os2' ) # OS/2 cannot handle handles && !($from_a_handle && $^O eq 'mpeix') # and neither can MPE/iX. && !($from_a_handle && $^O eq 'MSWin32') && !($from_a_handle && $^O eq 'NetWare') ) { my $copy_to = $to; if ($^O eq 'VMS' && -e $from) { if (! -d $to && ! -d $from) { my $vms_efs = _vms_efs(); my $unix_rpt = _vms_unix_rpt(); my $unix_mode = 0; my $from_unix = 0; $from_unix = 1 if ($from =~ /^\.\.?$/); my $from_vms = 0; $from_vms = 1 if ($from =~ m#[\[<\]]#); # Need to know if we are in Unix mode. if ($from_vms == $from_unix) { $unix_mode = $unix_rpt; } else { $unix_mode = $from_unix; } # VMS has sticky defaults on extensions, which means that # if there is a null extension on the destination file, it # will inherit the extension of the source file # So add a '.' for a null extension. # In unix_rpt mode, the trailing dot should not be added. if ($vms_efs) { $copy_to = $to; } else { $copy_to = VMS::Filespec::vmsify($to); } my ($vol, $dirs, $file) = File::Spec->splitpath($copy_to); $file = $file . '.' unless (($file =~ /(?catpath($vol, $dirs, $file); # Get rid of the old versions to be like UNIX 1 while unlink $copy_to; } } return syscopy($from, $copy_to) || 0; } my $closefrom = 0; my $closeto = 0; my ($status, $r, $buf); local($\) = ''; my $from_h; if ($from_a_handle) { $from_h = $from; } else { open $from_h, "<", $from or goto fail_open1; binmode $from_h or die "($!,$^E)"; $closefrom = 1; } # Seems most logical to do this here, in case future changes would want to # make this croak for some reason. unless (defined $size) { $size = tied(*$from_h) ? 0 : -s $from_h || 0; $size = 1024 if ($size < 512); $size = $Too_Big if ($size > $Too_Big); } my $to_h; if ($to_a_handle) { $to_h = $to; } else { $to_h = \do { local *FH }; # XXX is this line obsolete? open $to_h, ">", $to or goto fail_open2; binmode $to_h or die "($!,$^E)"; $closeto = 1; } $! = 0; for (;;) { my ($r, $w, $t); defined($r = sysread($from_h, $buf, $size)) or goto fail_inner; last unless $r; for ($w = 0; $w < $r; $w += $t) { $t = syswrite($to_h, $buf, $r - $w, $w) or goto fail_inner; } } close($to_h) || goto fail_open2 if $closeto; close($from_h) || goto fail_open1 if $closefrom; # Use this idiom to avoid uninitialized value warning. return 1; # All of these contortions try to preserve error messages... fail_inner: if ($closeto) { $status = $!; $! = 0; close $to_h; $! = $status unless $!; } fail_open2: if ($closefrom) { $status = $!; $! = 0; close $from_h; $! = $status unless $!; } fail_open1: return 0; } sub cp { my($from,$to) = @_; my(@fromstat) = stat $from; my(@tostat) = stat $to; my $perm; return 0 unless copy(@_) and @fromstat; if (@tostat) { $perm = $tostat[2]; } else { $perm = $fromstat[2] & ~(umask || 0); @tostat = stat $to; } # Might be more robust to look for S_I* in Fcntl, but we're # trying to avoid dependence on any XS-containing modules, # since File::Copy is used during the Perl build. $perm &= 07777; if ($perm & 06000) { croak("Unable to check setuid/setgid permissions for $to: $!") unless @tostat; if ($perm & 04000 and # setuid $fromstat[4] != $tostat[4]) { # owner must match $perm &= ~06000; } if ($perm & 02000 && $> != 0) { # if not root, setgid my $ok = $fromstat[5] == $tostat[5]; # group must match if ($ok) { # and we must be in group $ok = grep { $_ == $fromstat[5] } split /\s+/, $) } $perm &= ~06000 unless $ok; } } return 0 unless @tostat; return 1 if $perm == ($tostat[2] & 07777); return eval { chmod $perm, $to; } ? 1 : 0; } sub _move { croak("Usage: move(FROM, TO) ") unless @_ == 3; my($from,$to,$fallback) = @_; my($fromsz,$tosz1,$tomt1,$tosz2,$tomt2,$sts,$ossts); if (-d $to && ! -d $from) { $to = _catname($from, $to); } ($tosz1,$tomt1) = (stat($to))[7,9]; $fromsz = -s $from; if ($^O eq 'os2' and defined $tosz1 and defined $fromsz) { # will not rename with overwrite unlink $to; } my $rename_to = $to; if (-$^O eq 'VMS' && -e $from) { if (! -d $to && ! -d $from) { my $vms_efs = _vms_efs(); my $unix_rpt = _vms_unix_rpt(); my $unix_mode = 0; my $from_unix = 0; $from_unix = 1 if ($from =~ /^\.\.?$/); my $from_vms = 0; $from_vms = 1 if ($from =~ m#[\[<\]]#); # Need to know if we are in Unix mode. if ($from_vms == $from_unix) { $unix_mode = $unix_rpt; } else { $unix_mode = $from_unix; } # VMS has sticky defaults on extensions, which means that # if there is a null extension on the destination file, it # will inherit the extension of the source file # So add a '.' for a null extension. # In unix_rpt mode, the trailing dot should not be added. if ($vms_efs) { $rename_to = $to; } else { $rename_to = VMS::Filespec::vmsify($to); } my ($vol, $dirs, $file) = File::Spec->splitpath($rename_to); $file = $file . '.' unless (($file =~ /(?catpath($vol, $dirs, $file); # Get rid of the old versions to be like UNIX 1 while unlink $rename_to; } } return 1 if rename $from, $rename_to; # Did rename return an error even though it succeeded, because $to # is on a remote NFS file system, and NFS lost the server's ack? return 1 if defined($fromsz) && !-e $from && # $from disappeared (($tosz2,$tomt2) = (stat($to))[7,9]) && # $to's there ((!defined $tosz1) || # not before or ($tosz1 != $tosz2 or $tomt1 != $tomt2)) && # was changed $tosz2 == $fromsz; # it's all there ($tosz1,$tomt1) = (stat($to))[7,9]; # just in case rename did something { local $@; eval { local $SIG{__DIE__}; $fallback->($from,$to) or die; my($atime, $mtime) = (stat($from))[8,9]; utime($atime, $mtime, $to); unlink($from) or die; }; return 1 unless $@; } ($sts,$ossts) = ($! + 0, $^E + 0); ($tosz2,$tomt2) = ((stat($to))[7,9],0,0) if defined $tomt1; unlink($to) if !defined($tomt1) or $tomt1 != $tomt2 or $tosz1 != $tosz2; ($!,$^E) = ($sts,$ossts); return 0; } sub move { _move(@_,\©); } sub mv { _move(@_,\&cp); } # &syscopy is an XSUB under OS/2 unless (defined &syscopy) { if ($^O eq 'VMS') { *syscopy = \&rmscopy; } elsif ($^O eq 'mpeix') { *syscopy = sub { return 0 unless @_ == 2; # Use the MPE cp program in order to # preserve MPE file attributes. return system('/bin/cp', '-f', $_[0], $_[1]) == 0; }; } elsif ($^O eq 'MSWin32' && defined &DynaLoader::boot_DynaLoader) { # Win32::CopyFile() fill only work if we can load Win32.xs *syscopy = sub { return 0 unless @_ == 2; return Win32::CopyFile(@_, 1); }; } else { $Syscopy_is_copy = 1; *syscopy = \© } } 1; __END__ =head1 NAME File::Copy - Copy files or filehandles =head1 SYNOPSIS use File::Copy; copy("file1","file2") or die "Copy failed: $!"; copy("Copy.pm",\*STDOUT); move("/dev1/fileA","/dev2/fileB"); use File::Copy "cp"; $n = FileHandle->new("/a/file","r"); cp($n,"x"); =head1 DESCRIPTION The File::Copy module provides two basic functions, C and C, which are useful for getting the contents of a file from one place to another. =over 4 =item copy X X The C function takes two parameters: a file to copy from and a file to copy to. Either argument may be a string, a FileHandle reference or a FileHandle glob. Obviously, if the first argument is a filehandle of some sort, it will be read from, and if it is a file I it will be opened for reading. Likewise, the second argument will be written to (and created if need be). Trying to copy a file on top of itself is a fatal error. If the destination (second argument) already exists and is a directory, and the source (first argument) is not a filehandle, then the source file will be copied into the directory specified by the destination, using the same base name as the source file. It's a failure to have a filehandle as the source when the destination is a directory. B Files are opened in binary mode where applicable. To get a consistent behaviour when copying from a filehandle to a file, use C on the filehandle. An optional third parameter can be used to specify the buffer size used for copying. This is the number of bytes from the first file, that will be held in memory at any given time, before being written to the second file. The default buffer size depends upon the file, but will generally be the whole file (up to 2MB), or 1k for filehandles that do not reference files (eg. sockets). You may use the syntax C to get at the C alias for this function. The syntax is I the same. The behavior is nearly the same as well: as of version 2.15, will preserve the source file's permission bits like the shell utility C would do, while C uses the default permissions for the target file (which may depend on the process' C, file ownership, inherited ACLs, etc.). If an error occurs in setting permissions, C will return 0, regardless of whether the file was successfully copied. =item move X X X The C function also takes two parameters: the current name and the intended name of the file to be moved. If the destination already exists and is a directory, and the source is not a directory, then the source file will be renamed into the directory specified by the destination. If possible, move() will simply rename the file. Otherwise, it copies the file to the new location and deletes the original. If an error occurs during this copy-and-delete process, you may be left with a (possibly partial) copy of the file under the destination name. You may use the C alias for this function in the same way that you may use the alias for C. =item syscopy X File::Copy also provides the C routine, which copies the file specified in the first parameter to the file specified in the second parameter, preserving OS-specific attributes and file structure. For Unix systems, this is equivalent to the simple C routine, which doesn't preserve OS-specific attributes. For VMS systems, this calls the C routine (see below). For OS/2 systems, this calls the C XSUB directly. For Win32 systems, this calls C. B is defined (OS/2, VMS and Win32)>: If both arguments to C are not file handles, then C will perform a "system copy" of the input file to a new output file, in order to preserve file attributes, indexed file structure, I The buffer size parameter is ignored. If either argument to C is a handle to an opened file, then data is copied using Perl operators, and no effort is made to preserve file attributes or record structure. The system copy routine may also be called directly under VMS and OS/2 as C (or under VMS as C, which is the routine that does the actual work for syscopy). =item rmscopy($from,$to[,$date_flag]) X The first and second arguments may be strings, typeglobs, typeglob references, or objects inheriting from IO::Handle; they are used in all cases to obtain the I of the input and output files, respectively. The name and type of the input file are used as defaults for the output file, if necessary. A new version of the output file is always created, which inherits the structure and RMS attributes of the input file, except for owner and protections (and possibly timestamps; see below). All data from the input file is copied to the output file; if either of the first two parameters to C is a file handle, its position is unchanged. (Note that this means a file handle pointing to the output file will be associated with an old version of that file after C returns, not the newly created version.) The third parameter is an integer flag, which tells C how to handle timestamps. If it is E 0, none of the input file's timestamps are propagated to the output file. If it is E 0, then it is interpreted as a bitmask: if bit 0 (the LSB) is set, then timestamps other than the revision date are propagated; if bit 1 is set, the revision date is propagated. If the third parameter to C is 0, then it behaves much like the DCL COPY command: if the name or type of the output file was explicitly specified, then no timestamps are propagated, but if they were taken implicitly from the input filespec, then all timestamps other than the revision date are propagated. If this parameter is not supplied, it defaults to 0. Like C, C returns 1 on success. If an error occurs, it sets C<$!>, deletes the output file, and returns 0. =back =head1 RETURN All functions return 1 on success, 0 on failure. $! will be set if an error was encountered. =head1 AUTHOR File::Copy was written by Aaron Sherman Iajs@ajs.comE> in 1995, and updated by Charles Bailey Ibailey@newman.upenn.eduE> in 1996. =cut DosGlob.pm000064400000017712147633754660006470 0ustar00#!perl -w # use strict fails #Can't use string ("main::glob") as a symbol ref while "strict refs" in use at /usr/lib/perl5/5.005/File/DosGlob.pm line 191. # # Documentation at the __END__ # package File::DosGlob; our $VERSION = '1.06'; use strict; use warnings; sub doglob { my $cond = shift; my @retval = (); my $fix_drive_relative_paths; #print "doglob: ", join('|', @_), "\n"; OUTER: for my $pat (@_) { my @matched = (); my @globdirs = (); my $head = '.'; my $sepchr = '/'; my $tail; next OUTER unless defined $pat and $pat ne ''; # if arg is within quotes strip em and do no globbing if ($pat =~ /^"(.*)"\z/s) { $pat = $1; if ($cond eq 'd') { push(@retval, $pat) if -d $pat } else { push(@retval, $pat) if -e $pat } next OUTER; } # wildcards with a drive prefix such as h:*.pm must be changed # to h:./*.pm to expand correctly if ($pat =~ m|^([A-Za-z]:)[^/\\]|s) { substr($pat,0,2) = $1 . "./"; $fix_drive_relative_paths = 1; } if ($pat =~ m|^(.*)([\\/])([^\\/]*)\z|s) { ($head, $sepchr, $tail) = ($1,$2,$3); #print "div: |$head|$sepchr|$tail|\n"; push (@retval, $pat), next OUTER if $tail eq ''; if ($head =~ /[*?]/) { @globdirs = doglob('d', $head); push(@retval, doglob($cond, map {"$_$sepchr$tail"} @globdirs)), next OUTER if @globdirs; } $head .= $sepchr if $head eq '' or $head =~ /^[A-Za-z]:\z/s; $pat = $tail; } # # If file component has no wildcards, we can avoid opendir unless ($pat =~ /[*?]/) { $head = '' if $head eq '.'; $head .= $sepchr unless $head eq '' or substr($head,-1) eq $sepchr; $head .= $pat; if ($cond eq 'd') { push(@retval,$head) if -d $head } else { push(@retval,$head) if -e $head } next OUTER; } opendir(D, $head) or next OUTER; my @leaves = readdir D; closedir D; $head = '' if $head eq '.'; $head .= $sepchr unless $head eq '' or substr($head,-1) eq $sepchr; # escape regex metachars but not glob chars $pat =~ s:([].+^\-\${}()[|]):\\$1:g; # and convert DOS-style wildcards to regex $pat =~ s/\*/.*/g; $pat =~ s/\?/.?/g; #print "regex: '$pat', head: '$head'\n"; my $matchsub = sub { $_[0] =~ m|^$pat\z|is }; INNER: for my $e (@leaves) { next INNER if $e eq '.' or $e eq '..'; next INNER if $cond eq 'd' and ! -d "$head$e"; push(@matched, "$head$e"), next INNER if &$matchsub($e); # # [DOS compatibility special case] # Failed, add a trailing dot and try again, but only # if name does not have a dot in it *and* pattern # has a dot *and* name is shorter than 9 chars. # if (index($e,'.') == -1 and length($e) < 9 and index($pat,'\\.') != -1) { push(@matched, "$head$e"), next INNER if &$matchsub("$e."); } } push @retval, @matched if @matched; } if ($fix_drive_relative_paths) { s|^([A-Za-z]:)\./|$1| for @retval; } return @retval; } # # this can be used to override CORE::glob in a specific # package by saying C in that # namespace. # # context (keyed by second cxix arg provided by core) my %entries; sub glob { my($pat,$cxix) = @_; my @pat; # glob without args defaults to $_ $pat = $_ unless defined $pat; # assume global context if not provided one $cxix = '_G_' unless defined $cxix; # if we're just beginning, do it all first if (!$entries{$cxix}) { # extract patterns if ($pat =~ /\s/) { require Text::ParseWords; @pat = Text::ParseWords::parse_line('\s+',0,$pat); } else { push @pat, $pat; } # Mike Mestnik: made to do abc{1,2,3} == abc1 abc2 abc3. # abc3 will be the original {3} (and drop the {}). # abc1 abc2 will be put in @appendpat. # This was just the easiest way, not nearly the best. REHASH: { my @appendpat = (); for (@pat) { # There must be a "," I.E. abc{efg} is not what we want. while ( /^(.*)(?; # from the command line (overrides only in main::) > perl -MFile::DosGlob=glob -e "print <../pe*/*p?>" =head1 DESCRIPTION A module that implements DOS-like globbing with a few enhancements. It is largely compatible with perlglob.exe (the M$ setargv.obj version) in all but one respect--it understands wildcards in directory components. For example, C<<..\\l*b\\file/*glob.p?>> will work as expected (in that it will find something like '..\lib\File/DosGlob.pm' alright). Note that all path components are case-insensitive, and that backslashes and forward slashes are both accepted, and preserved. You may have to double the backslashes if you are putting them in literally, due to double-quotish parsing of the pattern by perl. Spaces in the argument delimit distinct patterns, so C globs all filenames that end in C<.exe> or C<.dll>. If you want to put in literal spaces in the glob pattern, you can escape them with either double quotes, or backslashes. e.g. C, or C. The argument is tokenized using C, so see L for details of the quoting rules used. Extending it to csh patterns is left as an exercise to the reader. =head1 EXPORTS (by request only) glob() =head1 BUGS Should probably be built into the core, and needs to stop pandering to DOS habits. Needs a dose of optimizium too. =head1 AUTHOR Gurusamy Sarathy =head1 HISTORY =over 4 =item * Support for globally overriding glob() (GSAR 3-JUN-98) =item * Scalar context, independent iterator context fixes (GSAR 15-SEP-97) =item * A few dir-vs-file optimizations result in glob importation being 10 times faster than using perlglob.exe, and using perlglob.bat is only twice as slow as perlglob.exe (GSAR 28-MAY-97) =item * Several cleanups prompted by lack of compatible perlglob.exe under Borland (GSAR 27-MAY-97) =item * Initial version (GSAR 20-FEB-97) =back =head1 SEE ALSO perl perlglob.bat Text::ParseWords =cut stat.pm000064400000022706147633754660006111 0ustar00package File::stat; use 5.006; use strict; use warnings; use warnings::register; use Carp; BEGIN { *warnif = \&warnings::warnif } our(@EXPORT, @EXPORT_OK, %EXPORT_TAGS); our $VERSION = '1.05'; my @fields; BEGIN { use Exporter (); @EXPORT = qw(stat lstat); @fields = qw( $st_dev $st_ino $st_mode $st_nlink $st_uid $st_gid $st_rdev $st_size $st_atime $st_mtime $st_ctime $st_blksize $st_blocks ); @EXPORT_OK = ( @fields, "stat_cando" ); %EXPORT_TAGS = ( FIELDS => [ @fields, @EXPORT ] ); } use vars @fields; use Fcntl qw(S_IRUSR S_IWUSR S_IXUSR); BEGIN { # These constants will croak on use if the platform doesn't define # them. It's important to avoid inflicting that on the user. no strict 'refs'; for (qw(suid sgid svtx)) { my $val = eval { &{"Fcntl::S_I\U$_"} }; *{"_$_"} = defined $val ? sub { $_[0] & $val ? 1 : "" } : sub { "" }; } for (qw(SOCK CHR BLK REG DIR FIFO LNK)) { *{"S_IS$_"} = defined eval { &{"Fcntl::S_IF$_"} } ? \&{"Fcntl::S_IS$_"} : sub { "" }; } } # from doio.c sub _ingroup { my ($gid, $eff) = @_; # I am assuming that since VMS doesn't have getgroups(2), $) will # always only contain a single entry. $^O eq "VMS" and return $_[0] == $); my ($egid, @supp) = split " ", $); my ($rgid) = split " ", $(; $gid == ($eff ? $egid : $rgid) and return 1; grep $gid == $_, @supp and return 1; return ""; } # VMS uses the Unix version of the routine, even though this is very # suboptimal. VMS has a permissions structure that doesn't really fit # into struct stat, and unlike on Win32 the normal -X operators respect # that, but unfortunately by the time we get here we've already lost the # information we need. It looks to me as though if we were to preserve # the st_devnam entry of vmsish.h's fake struct stat (which actually # holds the filename) it might be possible to do this right, but both # getting that value out of the struct (perl's stat doesn't return it) # and interpreting it later would require this module to have an XS # component (at which point we might as well just call Perl_cando and # have done with it). if (grep $^O eq $_, qw/os2 MSWin32 dos/) { # from doio.c *cando = sub { ($_[0][2] & $_[1]) ? 1 : "" }; } else { # from doio.c *cando = sub { my ($s, $mode, $eff) = @_; my $uid = $eff ? $> : $<; # If we're root on unix and we are not testing for executable # status, then all file tests are true. $^O ne "VMS" and $uid == 0 and !($mode & 0111) and return 1; my ($stmode, $stuid, $stgid) = @$s[2,4,5]; # This code basically assumes that the rwx bits of the mode are # the 0777 bits, but so does Perl_cando. if ($stuid == $uid) { $stmode & $mode and return 1; } elsif (_ingroup($stgid, $eff)) { $stmode & ($mode >> 3) and return 1; } else { $stmode & ($mode >> 6) and return 1; } return ""; }; } # alias for those who don't like objects *stat_cando = \&cando; my %op = ( r => sub { cando($_[0], S_IRUSR, 1) }, w => sub { cando($_[0], S_IWUSR, 1) }, x => sub { cando($_[0], S_IXUSR, 1) }, o => sub { $_[0][4] == $> }, R => sub { cando($_[0], S_IRUSR, 0) }, W => sub { cando($_[0], S_IWUSR, 0) }, X => sub { cando($_[0], S_IXUSR, 0) }, O => sub { $_[0][4] == $< }, e => sub { 1 }, z => sub { $_[0][7] == 0 }, s => sub { $_[0][7] }, f => sub { S_ISREG ($_[0][2]) }, d => sub { S_ISDIR ($_[0][2]) }, l => sub { S_ISLNK ($_[0][2]) }, p => sub { S_ISFIFO($_[0][2]) }, S => sub { S_ISSOCK($_[0][2]) }, b => sub { S_ISBLK ($_[0][2]) }, c => sub { S_ISCHR ($_[0][2]) }, u => sub { _suid($_[0][2]) }, g => sub { _sgid($_[0][2]) }, k => sub { _svtx($_[0][2]) }, M => sub { ($^T - $_[0][9] ) / 86400 }, C => sub { ($^T - $_[0][10]) / 86400 }, A => sub { ($^T - $_[0][8] ) / 86400 }, ); use constant HINT_FILETEST_ACCESS => 0x00400000; # we need fallback=>1 or stringifying breaks use overload fallback => 1, -X => sub { my ($s, $op) = @_; if (index "rwxRWX", $op) { (caller 0)[8] & HINT_FILETEST_ACCESS and warnif("File::stat ignores use filetest 'access'"); $^O eq "VMS" and warnif("File::stat ignores VMS ACLs"); # It would be nice to have a warning about using -l on a # non-lstat, but that would require an extra member in the # object. } if ($op{$op}) { return $op{$op}->($_[0]); } else { croak "-$op is not implemented on a File::stat object"; } }; # Class::Struct forbids use of @ISA sub import { goto &Exporter::import } use Class::Struct qw(struct); struct 'File::stat' => [ map { $_ => '$' } qw{ dev ino mode nlink uid gid rdev size atime mtime ctime blksize blocks } ]; sub populate (@) { return unless @_; my $stob = new(); @$stob = ( $st_dev, $st_ino, $st_mode, $st_nlink, $st_uid, $st_gid, $st_rdev, $st_size, $st_atime, $st_mtime, $st_ctime, $st_blksize, $st_blocks ) = @_; return $stob; } sub lstat ($) { populate(CORE::lstat(shift)) } sub stat ($) { my $arg = shift; my $st = populate(CORE::stat $arg); return $st if defined $st; my $fh; { local $!; no strict 'refs'; require Symbol; $fh = \*{ Symbol::qualify( $arg, caller() )}; return unless defined fileno $fh; } return populate(CORE::stat $fh); } 1; __END__ =head1 NAME File::stat - by-name interface to Perl's built-in stat() functions =head1 SYNOPSIS use File::stat; $st = stat($file) or die "No $file: $!"; if ( ($st->mode & 0111) && $st->nlink > 1) ) { print "$file is executable with lotsa links\n"; } if ( -x $st ) { print "$file is executable\n"; } use Fcntl "S_IRUSR"; if ( $st->cando(S_IRUSR, 1) ) { print "My effective uid can read $file\n"; } use File::stat qw(:FIELDS); stat($file) or die "No $file: $!"; if ( ($st_mode & 0111) && ($st_nlink > 1) ) { print "$file is executable with lotsa links\n"; } =head1 DESCRIPTION This module's default exports override the core stat() and lstat() functions, replacing them with versions that return "File::stat" objects. This object has methods that return the similarly named structure field name from the stat(2) function; namely, dev, ino, mode, nlink, uid, gid, rdev, size, atime, mtime, ctime, blksize, and blocks. As of version 1.02 (provided with perl 5.12) the object provides C<"-X"> overloading, so you can call filetest operators (C<-f>, C<-x>, and so on) on it. It also provides a C<< ->cando >> method, called like $st->cando( ACCESS, EFFECTIVE ) where I is one of C, C or C from the L module, and I indicates whether to use effective (true) or real (false) ids. The method interprets the C, C and C fields, and returns whether or not the current process would be allowed the specified access. If you don't want to use the objects, you may import the C<< ->cando >> method into your namespace as a regular function called C. This takes an arrayref containing the return values of C or C as its first argument, and interprets it for you. You may also import all the structure fields directly into your namespace as regular variables using the :FIELDS import tag. (Note that this still overrides your stat() and lstat() functions.) Access these fields as variables named with a preceding C in front their method names. Thus, C<$stat_obj-Edev()> corresponds to $st_dev if you import the fields. To access this functionality without the core overrides, pass the C an empty import list, and then access function functions with their full qualified names. On the other hand, the built-ins are still available via the C pseudo-package. =head1 BUGS As of Perl 5.8.0 after using this module you cannot use the implicit C<$_> or the special filehandle C<_> with stat() or lstat(), trying to do so leads into strange errors. The workaround is for C<$_> to be explicit my $stat_obj = stat $_; and for C<_> to explicitly populate the object using the unexported and undocumented populate() function with CORE::stat(): my $stat_obj = File::stat::populate(CORE::stat(_)); =head1 ERRORS =over 4 =item -%s is not implemented on a File::stat object The filetest operators C<-t>, C<-T> and C<-B> are not implemented, as they require more information than just a stat buffer. =back =head1 WARNINGS These can all be disabled with no warnings "File::stat"; =over 4 =item File::stat ignores use filetest 'access' You have tried to use one of the C<-rwxRWX> filetests with C in effect. C will ignore the pragma, and just use the information in the C member as usual. =item File::stat ignores VMS ACLs VMS systems have a permissions structure that cannot be completely represented in a stat buffer, and unlike on other systems the builtin filetest operators respect this. The C overloads, however, do not, since the information required is not available. =back =head1 NOTE While this class is currently implemented using the Class::Struct module to build a struct-like class, you shouldn't rely upon this. =head1 AUTHOR Tom Christiansen Find.pm000064400000100120147633754660006001 0ustar00package File::Find; use 5.006; use strict; use warnings; use warnings::register; our $VERSION = '1.20'; require Exporter; require Cwd; # # Modified to ensure sub-directory traversal order is not inverted by stack # push and pops. That is remains in the same order as in the directory file, # or user pre-processing (EG:sorted). # =head1 NAME File::Find - Traverse a directory tree. =head1 SYNOPSIS use File::Find; find(\&wanted, @directories_to_search); sub wanted { ... } use File::Find; finddepth(\&wanted, @directories_to_search); sub wanted { ... } use File::Find; find({ wanted => \&process, follow => 1 }, '.'); =head1 DESCRIPTION These are functions for searching through directory trees doing work on each file found similar to the Unix I command. File::Find exports two functions, C and C. They work similarly but have subtle differences. =over 4 =item B find(\&wanted, @directories); find(\%options, @directories); C does a depth-first search over the given C<@directories> in the order they are given. For each file or directory found, it calls the C<&wanted> subroutine. (See below for details on how to use the C<&wanted> function). Additionally, for each directory found, it will C into that directory and continue the search, invoking the C<&wanted> function on each file or subdirectory in the directory. =item B finddepth(\&wanted, @directories); finddepth(\%options, @directories); C works just like C except that it invokes the C<&wanted> function for a directory I invoking it for the directory's contents. It does a postorder traversal instead of a preorder traversal, working from the bottom of the directory tree up where C works from the top of the tree down. =back =head2 %options The first argument to C is either a code reference to your C<&wanted> function, or a hash reference describing the operations to be performed for each file. The code reference is described in L below. Here are the possible keys for the hash: =over 3 =item C The value should be a code reference. This code reference is described in L below. The C<&wanted> subroutine is mandatory. =item C Reports the name of a directory only AFTER all its entries have been reported. Entry point C is a shortcut for specifying C<< { bydepth => 1 } >> in the first argument of C. =item C The value should be a code reference. This code reference is used to preprocess the current directory. The name of the currently processed directory is in C<$File::Find::dir>. Your preprocessing function is called after C, but before the loop that calls the C function. It is called with a list of strings (actually file/directory names) and is expected to return a list of strings. The code can be used to sort the file/directory names alphabetically, numerically, or to filter out directory entries based on their name alone. When I or I are in effect, C is a no-op. =item C The value should be a code reference. It is invoked just before leaving the currently processed directory. It is called in void context with no arguments. The name of the current directory is in C<$File::Find::dir>. This hook is handy for summarizing a directory, such as calculating its disk usage. When I or I are in effect, C is a no-op. =item C Causes symbolic links to be followed. Since directory trees with symbolic links (followed) may contain files more than once and may even have cycles, a hash has to be built up with an entry for each file. This might be expensive both in space and time for a large directory tree. See L and L below. If either I or I is in effect: =over 6 =item * It is guaranteed that an I has been called before the user's C function is called. This enables fast file checks involving S<_>. Note that this guarantee no longer holds if I or I are not set. =item * There is a variable C<$File::Find::fullname> which holds the absolute pathname of the file with all symbolic links resolved. If the link is a dangling symbolic link, then fullname will be set to C. =back This is a no-op on Win32. =item C This is similar to I except that it may report some files more than once. It does detect cycles, however. Since only symbolic links have to be hashed, this is much cheaper both in space and time. If processing a file more than once (by the user's C function) is worse than just taking time, the option I should be used. This is also a no-op on Win32. =item C C, which is the default, causes all files which are neither directories nor symbolic links to be ignored if they are about to be processed a second time. If a directory or a symbolic link are about to be processed a second time, File::Find dies. C causes File::Find to die if any file is about to be processed a second time. C causes File::Find to ignore any duplicate files and directories but to proceed normally otherwise. =item C If true and a code reference, will be called with the symbolic link name and the directory it lives in as arguments. Otherwise, if true and warnings are on, warning "symbolic_link_name is a dangling symbolic link\n" will be issued. If false, the dangling symbolic link will be silently ignored. =item C Does not C to each directory as it recurses. The C function will need to be aware of this, of course. In this case, C<$_> will be the same as C<$File::Find::name>. =item C If find is used in taint-mode (-T command line switch or if EUID != UID or if EGID != GID) then internally directory names have to be untainted before they can be chdir'ed to. Therefore they are checked against a regular expression I. Note that all names passed to the user's I function are still tainted. If this option is used while not in taint-mode, C is a no-op. =item C See above. This should be set using the C quoting operator. The default is set to C. Note that the parentheses are vital. =item C If set, a directory which fails the I is skipped, including all its sub-directories. The default is to 'die' in such a case. =back =head2 The wanted function The C function does whatever verifications you want on each file and directory. Note that despite its name, the C function is a generic callback function, and does B tell File::Find if a file is "wanted" or not. In fact, its return value is ignored. The wanted function takes no arguments but rather does its work through a collection of variables. =over 4 =item C<$File::Find::dir> is the current directory name, =item C<$_> is the current filename within that directory =item C<$File::Find::name> is the complete pathname to the file. =back The above variables have all been localized and may be changed without affecting data outside of the wanted function. For example, when examining the file F you will have: $File::Find::dir = /some/path/ $_ = foo.ext $File::Find::name = /some/path/foo.ext You are chdir()'d to C<$File::Find::dir> when the function is called, unless C was specified. Note that when changing to directories is in effect the root directory (F) is a somewhat special case inasmuch as the concatenation of C<$File::Find::dir>, C<'/'> and C<$_> is not literally equal to C<$File::Find::name>. The table below summarizes all variants: $File::Find::name $File::Find::dir $_ default / / . no_chdir=>0 /etc / etc /etc/x /etc x no_chdir=>1 / / / /etc / /etc /etc/x /etc /etc/x When C or C are in effect, there is also a C<$File::Find::fullname>. The function may set C<$File::Find::prune> to prune the tree unless C was specified. Unless C or C is specified, for compatibility reasons (find.pl, find2perl) there are in addition the following globals available: C<$File::Find::topdir>, C<$File::Find::topdev>, C<$File::Find::topino>, C<$File::Find::topmode> and C<$File::Find::topnlink>. This library is useful for the C tool, which when fed, find2perl / -name .nfs\* -mtime +7 \ -exec rm -f {} \; -o -fstype nfs -prune produces something like: sub wanted { /^\.nfs.*\z/s && (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) && int(-M _) > 7 && unlink($_) || ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) && $dev < 0 && ($File::Find::prune = 1); } Notice the C<_> in the above C: the C<_> is a magical filehandle that caches the information from the preceding C, C, or filetest. Here's another interesting wanted function. It will find all symbolic links that don't resolve: sub wanted { -l && !-e && print "bogus link: $File::Find::name\n"; } See also the script C on CPAN for a nice application of this module. =head1 WARNINGS If you run your program with the C<-w> switch, or if you use the C pragma, File::Find will report warnings for several weird situations. You can disable these warnings by putting the statement no warnings 'File::Find'; in the appropriate scope. See L for more info about lexical warnings. =head1 CAVEAT =over 2 =item $dont_use_nlink You can set the variable C<$File::Find::dont_use_nlink> to 1, if you want to force File::Find to always stat directories. This was used for file systems that do not have an C count matching the number of sub-directories. Examples are ISO-9660 (CD-ROM), AFS, HPFS (OS/2 file system), FAT (DOS file system) and a couple of others. You shouldn't need to set this variable, since File::Find should now detect such file systems on-the-fly and switch itself to using stat. This works even for parts of your file system, like a mounted CD-ROM. If you do set C<$File::Find::dont_use_nlink> to 1, you will notice slow-downs. =item symlinks Be aware that the option to follow symbolic links can be dangerous. Depending on the structure of the directory tree (including symbolic links to directories) you might traverse a given (physical) directory more than once (only if C is in effect). Furthermore, deleting or changing files in a symbolically linked directory might cause very unpleasant surprises, since you delete or change files in an unknown directory. =back =head1 BUGS AND CAVEATS Despite the name of the C function, both C and C perform a depth-first search of the directory hierarchy. =head1 HISTORY File::Find used to produce incorrect results if called recursively. During the development of perl 5.8 this bug was fixed. The first fixed version of File::Find was 1.01. =head1 SEE ALSO find, find2perl. =cut our @ISA = qw(Exporter); our @EXPORT = qw(find finddepth); use strict; my $Is_VMS; my $Is_Win32; require File::Basename; require File::Spec; # Should ideally be my() not our() but local() currently # refuses to operate on lexicals our %SLnkSeen; our ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat, $pre_process, $post_process, $dangling_symlinks); sub contract_name { my ($cdir,$fn) = @_; return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir; $cdir = substr($cdir,0,rindex($cdir,'/')+1); $fn =~ s|^\./||; my $abs_name= $cdir . $fn; if (substr($fn,0,3) eq '../') { 1 while $abs_name =~ s!/[^/]*/\.\./+!/!; } return $abs_name; } sub PathCombine($$) { my ($Base,$Name) = @_; my $AbsName; if (substr($Name,0,1) eq '/') { $AbsName= $Name; } else { $AbsName= contract_name($Base,$Name); } # (simple) check for recursion my $newlen= length($AbsName); if ($newlen <= length($Base)) { if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/') && $AbsName eq substr($Base,0,$newlen)) { return undef; } } return $AbsName; } sub Follow_SymLink($) { my ($AbsName) = @_; my ($NewName,$DEV, $INO); ($DEV, $INO)= lstat $AbsName; while (-l _) { if ($SLnkSeen{$DEV, $INO}++) { if ($follow_skip < 2) { die "$AbsName is encountered a second time"; } else { return undef; } } $NewName= PathCombine($AbsName, readlink($AbsName)); unless(defined $NewName) { if ($follow_skip < 2) { die "$AbsName is a recursive symbolic link"; } else { return undef; } } else { $AbsName= $NewName; } ($DEV, $INO) = lstat($AbsName); return undef unless defined $DEV; # dangling symbolic link } if ($full_check && defined $DEV && $SLnkSeen{$DEV, $INO}++) { if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) { die "$AbsName encountered a second time"; } else { return undef; } } return $AbsName; } our($dir, $name, $fullname, $prune); sub _find_dir_symlnk($$$); sub _find_dir($$$); # check whether or not a scalar variable is tainted # (code straight from the Camel, 3rd ed., page 561) sub is_tainted_pp { my $arg = shift; my $nada = substr($arg, 0, 0); # zero-length local $@; eval { eval "# $nada" }; return length($@) != 0; } sub _find_opt { my $wanted = shift; die "invalid top directory" unless defined $_[0]; # This function must local()ize everything because callbacks may # call find() or finddepth() local %SLnkSeen; local ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat, $pre_process, $post_process, $dangling_symlinks); local($dir, $name, $fullname, $prune); local *_ = \my $a; my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::getcwd(); if ($Is_VMS) { # VMS returns this by default in VMS format which just doesn't # work for the rest of this module. $cwd = VMS::Filespec::unixpath($cwd); # Apparently this is not expected to have a trailing space. # To attempt to make VMS/UNIX conversions mostly reversable, # a trailing slash is needed. The run-time functions ignore the # resulting double slash, but it causes the perl tests to fail. $cwd =~ s#/\z##; # This comes up in upper case now, but should be lower. # In the future this could be exact case, no need to change. } my $cwd_untainted = $cwd; my $check_t_cwd = 1; $wanted_callback = $wanted->{wanted}; $bydepth = $wanted->{bydepth}; $pre_process = $wanted->{preprocess}; $post_process = $wanted->{postprocess}; $no_chdir = $wanted->{no_chdir}; $full_check = $Is_Win32 ? 0 : $wanted->{follow}; $follow = $Is_Win32 ? 0 : $full_check || $wanted->{follow_fast}; $follow_skip = $wanted->{follow_skip}; $untaint = $wanted->{untaint}; $untaint_pat = $wanted->{untaint_pattern}; $untaint_skip = $wanted->{untaint_skip}; $dangling_symlinks = $wanted->{dangling_symlinks}; # for compatibility reasons (find.pl, find2perl) local our ($topdir, $topdev, $topino, $topmode, $topnlink); # a symbolic link to a directory doesn't increase the link count $avoid_nlink = $follow || $File::Find::dont_use_nlink; my ($abs_dir, $Is_Dir); Proc_Top_Item: foreach my $TOP (@_) { my $top_item = $TOP; ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item; if ($Is_Win32) { $top_item =~ s|[/\\]\z|| unless $top_item =~ m{^(?:\w:)?[/\\]$}; } else { $top_item =~ s|/\z|| unless $top_item eq '/'; } $Is_Dir= 0; if ($follow) { if (substr($top_item,0,1) eq '/') { $abs_dir = $top_item; } elsif ($top_item eq $File::Find::current_dir) { $abs_dir = $cwd; } else { # care about any ../ $top_item =~ s/\.dir\z//i if $Is_VMS; $abs_dir = contract_name("$cwd/",$top_item); } $abs_dir= Follow_SymLink($abs_dir); unless (defined $abs_dir) { if ($dangling_symlinks) { if (ref $dangling_symlinks eq 'CODE') { $dangling_symlinks->($top_item, $cwd); } else { warnings::warnif "$top_item is a dangling symbolic link\n"; } } next Proc_Top_Item; } if (-d _) { $top_item =~ s/\.dir\z//i if $Is_VMS; _find_dir_symlnk($wanted, $abs_dir, $top_item); $Is_Dir= 1; } } else { # no follow $topdir = $top_item; unless (defined $topnlink) { warnings::warnif "Can't stat $top_item: $!\n"; next Proc_Top_Item; } if (-d _) { $top_item =~ s/\.dir\z//i if $Is_VMS; _find_dir($wanted, $top_item, $topnlink); $Is_Dir= 1; } else { $abs_dir= $top_item; } } unless ($Is_Dir) { unless (($_,$dir) = File::Basename::fileparse($abs_dir)) { ($dir,$_) = ('./', $top_item); } $abs_dir = $dir; if (( $untaint ) && (is_tainted($dir) )) { ( $abs_dir ) = $dir =~ m|$untaint_pat|; unless (defined $abs_dir) { if ($untaint_skip == 0) { die "directory $dir is still tainted"; } else { next Proc_Top_Item; } } } unless ($no_chdir || chdir $abs_dir) { warnings::warnif "Couldn't chdir $abs_dir: $!\n"; next Proc_Top_Item; } $name = $abs_dir . $_; # $File::Find::name $_ = $name if $no_chdir; { $wanted_callback->() }; # protect against wild "next" } unless ( $no_chdir ) { if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) { ( $cwd_untainted ) = $cwd =~ m|$untaint_pat|; unless (defined $cwd_untainted) { die "insecure cwd in find(depth)"; } $check_t_cwd = 0; } unless (chdir $cwd_untainted) { die "Can't cd to $cwd: $!\n"; } } } } # API: # $wanted # $p_dir : "parent directory" # $nlink : what came back from the stat # preconditions: # chdir (if not no_chdir) to dir sub _find_dir($$$) { my ($wanted, $p_dir, $nlink) = @_; my ($CdLvl,$Level) = (0,0); my @Stack; my @filenames; my ($subcount,$sub_nlink); my $SE= []; my $dir_name= $p_dir; my $dir_pref; my $dir_rel = $File::Find::current_dir; my $tainted = 0; my $no_nlink; if ($Is_Win32) { $dir_pref = ($p_dir =~ m{^(?:\w:[/\\]?|[/\\])$} ? $p_dir : "$p_dir/" ); } elsif ($Is_VMS) { # VMS is returning trailing .dir on directories # and trailing . on files and symbolic links # in UNIX syntax. # $p_dir =~ s/\.(dir)?$//i unless $p_dir eq '.'; $dir_pref = ($p_dir =~ m/[\]>]+$/ ? $p_dir : "$p_dir/" ); } else { $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" ); } local ($dir, $name, $prune, *DIR); unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) { my $udir = $p_dir; if (( $untaint ) && (is_tainted($p_dir) )) { ( $udir ) = $p_dir =~ m|$untaint_pat|; unless (defined $udir) { if ($untaint_skip == 0) { die "directory $p_dir is still tainted"; } else { return; } } } unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) { warnings::warnif "Can't cd to $udir: $!\n"; return; } } # push the starting directory push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth; while (defined $SE) { unless ($bydepth) { $dir= $p_dir; # $File::Find::dir $name= $dir_name; # $File::Find::name $_= ($no_chdir ? $dir_name : $dir_rel ); # $_ # prune may happen here $prune= 0; { $wanted_callback->() }; # protect against wild "next" next if $prune; } # change to that directory unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { my $udir= $dir_rel; if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) { ( $udir ) = $dir_rel =~ m|$untaint_pat|; unless (defined $udir) { if ($untaint_skip == 0) { die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted"; } else { # $untaint_skip == 1 next; } } } unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) { warnings::warnif "Can't cd to (" . ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n"; next; } $CdLvl++; } $dir= $dir_name; # $File::Find::dir # Get the list of files in the current directory. unless (opendir DIR, ($no_chdir ? $dir_name : $File::Find::current_dir)) { warnings::warnif "Can't opendir($dir_name): $!\n"; next; } @filenames = readdir DIR; closedir(DIR); @filenames = $pre_process->(@filenames) if $pre_process; push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process; # default: use whatever was specified # (if $nlink >= 2, and $avoid_nlink == 0, this will switch back) $no_nlink = $avoid_nlink; # if dir has wrong nlink count, force switch to slower stat method $no_nlink = 1 if ($nlink < 2); if ($nlink == 2 && !$no_nlink) { # This dir has no subdirectories. for my $FN (@filenames) { if ($Is_VMS) { # Big hammer here - Compensate for VMS trailing . and .dir # No win situation until this is changed, but this # will handle the majority of the cases with breaking the fewest $FN =~ s/\.dir\z//i; $FN =~ s#\.$## if ($FN ne '.'); } next if $FN =~ $File::Find::skip_pattern; $name = $dir_pref . $FN; # $File::Find::name $_ = ($no_chdir ? $name : $FN); # $_ { $wanted_callback->() }; # protect against wild "next" } } else { # This dir has subdirectories. $subcount = $nlink - 2; # HACK: insert directories at this position. so as to preserve # the user pre-processed ordering of files. # EG: directory traversal is in user sorted order, not at random. my $stack_top = @Stack; for my $FN (@filenames) { next if $FN =~ $File::Find::skip_pattern; if ($subcount > 0 || $no_nlink) { # Seen all the subdirs? # check for directoriness. # stat is faster for a file in the current directory $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3]; if (-d _) { --$subcount; $FN =~ s/\.dir\z//i if $Is_VMS; # HACK: replace push to preserve dir traversal order #push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink]; splice @Stack, $stack_top, 0, [$CdLvl,$dir_name,$FN,$sub_nlink]; } else { $name = $dir_pref . $FN; # $File::Find::name $_= ($no_chdir ? $name : $FN); # $_ { $wanted_callback->() }; # protect against wild "next" } } else { $name = $dir_pref . $FN; # $File::Find::name $_= ($no_chdir ? $name : $FN); # $_ { $wanted_callback->() }; # protect against wild "next" } } } } continue { while ( defined ($SE = pop @Stack) ) { ($Level, $p_dir, $dir_rel, $nlink) = @$SE; if ($CdLvl > $Level && !$no_chdir) { my $tmp; if ($Is_VMS) { $tmp = '[' . ('-' x ($CdLvl-$Level)) . ']'; } else { $tmp = join('/',('..') x ($CdLvl-$Level)); } die "Can't cd to $tmp from $dir_name" unless chdir ($tmp); $CdLvl = $Level; } if ($Is_Win32) { $dir_name = ($p_dir =~ m{^(?:\w:[/\\]?|[/\\])$} ? "$p_dir$dir_rel" : "$p_dir/$dir_rel"); $dir_pref = "$dir_name/"; } elsif ($^O eq 'VMS') { if ($p_dir =~ m/[\]>]+$/) { $dir_name = $p_dir; $dir_name =~ s/([\]>]+)$/.$dir_rel$1/; $dir_pref = $dir_name; } else { $dir_name = "$p_dir/$dir_rel"; $dir_pref = "$dir_name/"; } } else { $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel"); $dir_pref = "$dir_name/"; } if ( $nlink == -2 ) { $name = $dir = $p_dir; # $File::Find::name / dir $_ = $File::Find::current_dir; $post_process->(); # End-of-directory processing } elsif ( $nlink < 0 ) { # must be finddepth, report dirname now $name = $dir_name; if ( substr($name,-2) eq '/.' ) { substr($name, length($name) == 2 ? -1 : -2) = ''; } $dir = $p_dir; $_ = ($no_chdir ? $dir_name : $dir_rel ); if ( substr($_,-2) eq '/.' ) { substr($_, length($_) == 2 ? -1 : -2) = ''; } { $wanted_callback->() }; # protect against wild "next" } else { push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth; last; } } } } # API: # $wanted # $dir_loc : absolute location of a dir # $p_dir : "parent directory" # preconditions: # chdir (if not no_chdir) to dir sub _find_dir_symlnk($$$) { my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory my @Stack; my @filenames; my $new_loc; my $updir_loc = $dir_loc; # untainted parent directory my $SE = []; my $dir_name = $p_dir; my $dir_pref; my $loc_pref; my $dir_rel = $File::Find::current_dir; my $byd_flag; # flag for pending stack entry if $bydepth my $tainted = 0; my $ok = 1; $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" ); $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" ); local ($dir, $name, $fullname, $prune, *DIR); unless ($no_chdir) { # untaint the topdir if (( $untaint ) && (is_tainted($dir_loc) )) { ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted # once untainted, $updir_loc is pushed on the stack (as parent directory); # hence, we don't need to untaint the parent directory every time we chdir # to it later unless (defined $updir_loc) { if ($untaint_skip == 0) { die "directory $dir_loc is still tainted"; } else { return; } } } $ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir); unless ($ok) { warnings::warnif "Can't cd to $updir_loc: $!\n"; return; } } push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1] if $bydepth; while (defined $SE) { unless ($bydepth) { # change (back) to parent directory (always untainted) unless ($no_chdir) { unless (chdir $updir_loc) { warnings::warnif "Can't cd to $updir_loc: $!\n"; next; } } $dir= $p_dir; # $File::Find::dir $name= $dir_name; # $File::Find::name $_= ($no_chdir ? $dir_name : $dir_rel ); # $_ $fullname= $dir_loc; # $File::Find::fullname # prune may happen here $prune= 0; lstat($_); # make sure file tests with '_' work { $wanted_callback->() }; # protect against wild "next" next if $prune; } # change to that directory unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { $updir_loc = $dir_loc; if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) { # untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; unless (defined $updir_loc) { if ($untaint_skip == 0) { die "directory $dir_loc is still tainted"; } else { next; } } } unless (chdir $updir_loc) { warnings::warnif "Can't cd to $updir_loc: $!\n"; next; } } $dir = $dir_name; # $File::Find::dir # Get the list of files in the current directory. unless (opendir DIR, ($no_chdir ? $dir_loc : $File::Find::current_dir)) { warnings::warnif "Can't opendir($dir_loc): $!\n"; next; } @filenames = readdir DIR; closedir(DIR); for my $FN (@filenames) { if ($Is_VMS) { # Big hammer here - Compensate for VMS trailing . and .dir # No win situation until this is changed, but this # will handle the majority of the cases with breaking the fewest. $FN =~ s/\.dir\z//i; $FN =~ s#\.$## if ($FN ne '.'); } next if $FN =~ $File::Find::skip_pattern; # follow symbolic links / do an lstat $new_loc = Follow_SymLink($loc_pref.$FN); # ignore if invalid symlink unless (defined $new_loc) { if (!defined -l _ && $dangling_symlinks) { if (ref $dangling_symlinks eq 'CODE') { $dangling_symlinks->($FN, $dir_pref); } else { warnings::warnif "$dir_pref$FN is a dangling symbolic link\n"; } } $fullname = undef; $name = $dir_pref . $FN; $_ = ($no_chdir ? $name : $FN); { $wanted_callback->() }; next; } if (-d _) { if ($Is_VMS) { $FN =~ s/\.dir\z//i; $FN =~ s#\.$## if ($FN ne '.'); $new_loc =~ s/\.dir\z//i; $new_loc =~ s#\.$## if ($new_loc ne '.'); } push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1]; } else { $fullname = $new_loc; # $File::Find::fullname $name = $dir_pref . $FN; # $File::Find::name $_ = ($no_chdir ? $name : $FN); # $_ { $wanted_callback->() }; # protect against wild "next" } } } continue { while (defined($SE = pop @Stack)) { ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE; $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel"); $dir_pref = "$dir_name/"; $loc_pref = "$dir_loc/"; if ( $byd_flag < 0 ) { # must be finddepth, report dirname now unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted warnings::warnif "Can't cd to $updir_loc: $!\n"; next; } } $fullname = $dir_loc; # $File::Find::fullname $name = $dir_name; # $File::Find::name if ( substr($name,-2) eq '/.' ) { substr($name, length($name) == 2 ? -1 : -2) = ''; # $File::Find::name } $dir = $p_dir; # $File::Find::dir $_ = ($no_chdir ? $dir_name : $dir_rel); # $_ if ( substr($_,-2) eq '/.' ) { substr($_, length($_) == 2 ? -1 : -2) = ''; } lstat($_); # make sure file tests with '_' work { $wanted_callback->() }; # protect against wild "next" } else { push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1] if $bydepth; last; } } } } sub wrap_wanted { my $wanted = shift; if ( ref($wanted) eq 'HASH' ) { unless( exists $wanted->{wanted} and ref( $wanted->{wanted} ) eq 'CODE' ) { die 'no &wanted subroutine given'; } if ( $wanted->{follow} || $wanted->{follow_fast}) { $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip}; } if ( $wanted->{untaint} ) { $wanted->{untaint_pattern} = $File::Find::untaint_pattern unless defined $wanted->{untaint_pattern}; $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip}; } return $wanted; } elsif( ref( $wanted ) eq 'CODE' ) { return { wanted => $wanted }; } else { die 'no &wanted subroutine given'; } } sub find { my $wanted = shift; _find_opt(wrap_wanted($wanted), @_); } sub finddepth { my $wanted = wrap_wanted(shift); $wanted->{bydepth} = 1; _find_opt($wanted, @_); } # default $File::Find::skip_pattern = qr/^\.{1,2}\z/; $File::Find::untaint_pattern = qr|^([-+@\w./]+)$|; # These are hard-coded for now, but may move to hint files. if ($^O eq 'VMS') { $Is_VMS = 1; $File::Find::dont_use_nlink = 1; } elsif ($^O eq 'MSWin32') { $Is_Win32 = 1; } # this _should_ work properly on all platforms # where File::Find can be expected to work $File::Find::current_dir = File::Spec->curdir || '.'; $File::Find::dont_use_nlink = 1 if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $Is_Win32 || $^O eq 'interix' || $^O eq 'cygwin' || $^O eq 'epoc' || $^O eq 'qnx' || $^O eq 'nto'; # Set dont_use_nlink in your hint file if your system's stat doesn't # report the number of links in a directory as an indication # of the number of files. # See, e.g. hints/machten.sh for MachTen 2.2. unless ($File::Find::dont_use_nlink) { require Config; $File::Find::dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'}); } # We need a function that checks if a scalar is tainted. Either use the # Scalar::Util module's tainted() function or our (slower) pure Perl # fallback is_tainted_pp() { local $@; eval { require Scalar::Util }; *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted; } 1; HomeDir/Test.pm000064400000005703147634421560007371 0ustar00package File::HomeDir::Test; use 5.00503; use strict; use Carp (); use File::Spec (); use File::Temp (); use File::HomeDir::Driver (); use vars qw{$VERSION @ISA %DIR $ENABLED}; BEGIN { $VERSION = '1.00'; @ISA = 'File::HomeDir::Driver'; %DIR = (); $ENABLED = 0; } # Special magic use in test scripts sub import { my $class = shift; die "Attempted to initialise File::HomeDir::Test trice" if %DIR; # Fill the test directories my $BASE = File::Temp::tempdir( CLEANUP => 1 ); %DIR = map { $_ => File::Spec->catdir( $BASE, $_ ) } qw{ my_home my_desktop my_documents my_data my_music my_pictures my_videos }; # Hijack HOME to the home directory $ENV{HOME} = $DIR{my_home}; # Make File::HomeDir load us instead of the native driver $File::HomeDir::IMPLEMENTED_BY = # Prevent a warning $File::HomeDir::IMPLEMENTED_BY = 'File::HomeDir::Test'; # Ready to go $ENABLED = 1; } ##################################################################### # Current User Methods sub my_home { mkdir($DIR{my_home}, 0755) unless -d $DIR{my_home}; return $DIR{my_home}; } sub my_desktop { mkdir($DIR{my_desktop}, 0755) unless -d $DIR{my_desktop}; return $DIR{my_desktop}; } sub my_documents { mkdir($DIR{my_documents}, 0755) unless -f $DIR{my_documents}; return $DIR{my_documents}; } sub my_data { mkdir($DIR{my_data}, 0755) unless -d $DIR{my_data}; return $DIR{my_data}; } sub my_music { mkdir($DIR{my_music}, 0755) unless -d $DIR{my_music}; return $DIR{my_music}; } sub my_pictures { mkdir($DIR{my_pictures}, 0755) unless -d $DIR{my_pictures}; return $DIR{my_pictures}; } sub my_videos { mkdir($DIR{my_videos}, 0755) unless -d $DIR{my_videos}; return $DIR{my_videos}; } sub users_home { return undef; } 1; __END__ =pod =head1 NAME File::HomeDir::Test - Prevent the accidental creation of user-owned files during testing =head1 SYNOPSIS use Test::More test => 1; use File::HomeDir::Test; use File::HomeDir; =head1 DESCRIPTION B is a L driver intended for use in the test scripts of modules or applications that write files into user-owned directories. It is designed to prevent the pollution of user directories with files that are not part of the application install itself, but were created during testing. These files can leak state information from the tests into the run-time usage of an application, and on Unix systems also prevents tests (which may be executed as root via sudo) from writing files which cannot later be modified or removed by the regular user. =head1 SUPPORT See the support section of the main L documentation. =head1 AUTHOR Adam Kennedy Eadamk@cpan.orgE =head1 COPYRIGHT Copyright 2005 - 2011 Adam Kennedy. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. The full text of the license can be found in the LICENSE file included with this module. =cut HomeDir/Unix.pm000064400000006525147634421560007400 0ustar00package File::HomeDir::Unix; # See POD at the end of the file for documentation use 5.00503; use strict; use Carp (); use File::HomeDir::Driver (); use vars qw{$VERSION @ISA}; BEGIN { $VERSION = '1.00'; @ISA = 'File::HomeDir::Driver'; } ##################################################################### # Current User Methods sub my_home { my $class = shift; my $home = $class->_my_home(@_); # On Unix in general, a non-existant home means "no home" # For example, "nobody"-like users might use /nonexistant if ( defined $home and ! -d $home ) { $home = undef; } return $home; } sub _my_home { my $class = shift; if ( exists $ENV{HOME} and defined $ENV{HOME} ) { return $ENV{HOME}; } # This is from the original code, but I'm guessing # it means "login directory" and exists on some Unixes. if ( exists $ENV{LOGDIR} and $ENV{LOGDIR} ) { return $ENV{LOGDIR}; } ### More-desperate methods # Light desperation on any (Unixish) platform SCOPE: { my $home = (getpwuid($<))[7]; return $home if $home and -d $home; } return undef; } # On unix by default, everything is under the same folder sub my_desktop { shift->my_home; } sub my_documents { shift->my_home; } sub my_data { shift->my_home; } sub my_music { shift->my_home; } sub my_pictures { shift->my_home; } sub my_videos { shift->my_home; } ##################################################################### # General User Methods sub users_home { my ($class, $name) = @_; # IF and only if we have getpwuid support, and the # name of the user is our own, shortcut to my_home. # This is needed to handle HOME environment settings. if ( $name eq getpwuid($<) ) { return $class->my_home; } SCOPE: { my $home = (getpwnam($name))[7]; return $home if $home and -d $home; } return undef; } sub users_desktop { shift->users_home(@_); } sub users_documents { shift->users_home(@_); } sub users_data { shift->users_home(@_); } sub users_music { shift->users_home(@_); } sub users_pictures { shift->users_home(@_); } sub users_videos { shift->users_home(@_); } 1; =pod =head1 NAME File::HomeDir::Unix - Find your home and other directories on legacy Unix =head1 SYNOPSIS use File::HomeDir; # Find directories for the current user $home = File::HomeDir->my_home; # /home/mylogin $desktop = File::HomeDir->my_desktop; # All of these will... $docs = File::HomeDir->my_documents; # ...default to home... $music = File::HomeDir->my_music; # ...directory $pics = File::HomeDir->my_pictures; # $videos = File::HomeDir->my_videos; # $data = File::HomeDir->my_data; # =head1 DESCRIPTION This module provides implementations for determining common user directories. In normal usage this module will always be used via L. =head1 SUPPORT See the support section the main L module. =head1 AUTHORS Adam Kennedy Eadamk@cpan.orgE Sean M. Burke Esburke@cpan.orgE =head1 SEE ALSO L, L (legacy) =head1 COPYRIGHT Copyright 2005 - 2011 Adam Kennedy. Some parts copyright 2000 Sean M. Burke. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. The full text of the license can be found in the LICENSE file included with this module. =cut HomeDir/Driver.pm000064400000002142147634421560007677 0ustar00package File::HomeDir::Driver; # Abstract base class that provides no functionality, # but confirms the class is a File::HomeDir driver class. use 5.00503; use strict; use Carp (); use vars qw{$VERSION}; BEGIN { $VERSION = '1.00'; } sub my_home { Carp::croak("$_[0] does not implement compulsory method $_[1]"); } 1; =pod =head1 NAME File::HomeDir::Driver - Base class for all File::HomeDir drivers =head1 DESCRIPTION This module is the base class for all L drivers, and must be inherited from to identify a class as a driver. It is primarily provided as a convenience for this specific identification purpose, as L supports the specification of custom drivers and an C<-Eisa> check is used during the loading of the driver. =head1 AUTHOR Adam Kennedy Eadamk@cpan.orgE =head1 SEE ALSO L =head1 COPYRIGHT Copyright 2009 - 2011 Adam Kennedy. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. The full text of the license can be found in the LICENSE file included with this module. =cut HomeDir/Darwin.pm000064400000006275147634421560007703 0ustar00package File::HomeDir::Darwin; use 5.00503; use strict; use Cwd (); use Carp (); use File::HomeDir::Unix (); use vars qw{$VERSION @ISA}; BEGIN { $VERSION = '1.00'; @ISA = 'File::HomeDir::Unix'; } ##################################################################### # Current User Methods sub my_home { my $class = shift; if ( exists $ENV{HOME} and defined $ENV{HOME} ) { return $ENV{HOME}; } my $home = (getpwuid($<))[7]; return $home if $home && -d $home; return undef; } sub _my_home { my($class, $path) = @_; my $home = $class->my_home; return undef unless defined $home; my $folder = "$home/$path"; unless ( -d $folder ) { # Make sure that symlinks resolve to directories. return undef unless -l $folder; my $dir = readlink $folder or return; return undef unless -d $dir; } return Cwd::abs_path($folder); } sub my_desktop { my $class = shift; $class->_my_home('Desktop'); } sub my_documents { my $class = shift; $class->_my_home('Documents'); } sub my_data { my $class = shift; $class->_my_home('Library/Application Support'); } sub my_music { my $class = shift; $class->_my_home('Music'); } sub my_pictures { my $class = shift; $class->_my_home('Pictures'); } sub my_videos { my $class = shift; $class->_my_home('Movies'); } ##################################################################### # Arbitrary User Methods sub users_home { my $class = shift; my $home = $class->SUPER::users_home(@_); return defined $home ? Cwd::abs_path($home) : undef; } sub users_desktop { my ($class, $name) = @_; return undef if $name eq 'root'; $class->_to_user( $class->my_desktop, $name ); } sub users_documents { my ($class, $name) = @_; return undef if $name eq 'root'; $class->_to_user( $class->my_documents, $name ); } sub users_data { my ($class, $name) = @_; $class->_to_user( $class->my_data, $name ) || $class->users_home($name); } # cheap hack ... not entirely reliable, perhaps, but ... c'est la vie, since # there's really no other good way to do it at this time, that i know of -- pudge sub _to_user { my ($class, $path, $name) = @_; my $my_home = $class->my_home; my $users_home = $class->users_home($name); defined $users_home or return undef; $path =~ s/^\Q$my_home/$users_home/; return $path; } 1; =pod =head1 NAME File::HomeDir::Darwin - Find your home and other directories on Darwin (OS X) =head1 DESCRIPTION This module provides Mac OS X specific file path for determining common user directories in pure perl, by just using C<$ENV{HOME}> without Carbon nor Cocoa API calls. In normal usage this module will always be used via L. =head1 SYNOPSIS use File::HomeDir; # Find directories for the current user $home = File::HomeDir->my_home; # /Users/mylogin $desktop = File::HomeDir->my_desktop; # /Users/mylogin/Desktop $docs = File::HomeDir->my_documents; # /Users/mylogin/Documents $music = File::HomeDir->my_music; # /Users/mylogin/Music $pics = File::HomeDir->my_pictures; # /Users/mylogin/Pictures $videos = File::HomeDir->my_videos; # /Users/mylogin/Movies $data = File::HomeDir->my_data; # /Users/mylogin/Library/Application Support =cut HomeDir/Windows.pm000064400000014245147634421560010105 0ustar00package File::HomeDir::Windows; # See POD at the end of the file for documentation use 5.00503; use strict; use Carp (); use File::Spec (); use File::HomeDir::Driver (); use vars qw{$VERSION @ISA}; BEGIN { $VERSION = '1.00'; @ISA = 'File::HomeDir::Driver'; } sub CREATE () { 1 } ##################################################################### # Current User Methods sub my_home { my $class = shift; # A lot of unix people and unix-derived tools rely on # the ability to overload HOME. We will support it too # so that they can replace raw HOME calls with File::HomeDir. if ( exists $ENV{HOME} and $ENV{HOME} ) { return $ENV{HOME}; } # Do we have a user profile? if ( exists $ENV{USERPROFILE} and $ENV{USERPROFILE} ) { return $ENV{USERPROFILE}; } # Some Windows use something like $ENV{HOME} if ( exists $ENV{HOMEDRIVE} and exists $ENV{HOMEPATH} and $ENV{HOMEDRIVE} and $ENV{HOMEPATH} ) { return File::Spec->catpath( $ENV{HOMEDRIVE}, $ENV{HOMEPATH}, '', ); } return undef; } sub my_desktop { my $class = shift; # The most correct way to find the desktop SCOPE: { require Win32; my $dir = Win32::GetFolderPath(Win32::CSIDL_DESKTOP(), CREATE); return $dir if $dir and $class->_d($dir); } # MSWindows sets WINDIR, MS WinNT sets USERPROFILE. foreach my $e ( 'USERPROFILE', 'WINDIR' ) { next unless $ENV{$e}; my $desktop = File::Spec->catdir($ENV{$e}, 'Desktop'); return $desktop if $desktop and $class->_d($desktop); } # As a last resort, try some hard-wired values foreach my $fixed ( # The reason there are both types of slash here is because # this set of paths has been kept from thethe original version # of File::HomeDir::Win32 (before it was rewritten). # I can only assume this is Cygwin-related stuff. "C:\\windows\\desktop", "C:\\win95\\desktop", "C:/win95/desktop", "C:/windows/desktop", ) { return $fixed if $class->_d($fixed); } return undef; } sub my_documents { my $class = shift; # The most correct way to find my documents SCOPE: { require Win32; my $dir = Win32::GetFolderPath(Win32::CSIDL_PERSONAL(), CREATE); return $dir if $dir and $class->_d($dir); } return undef; } sub my_data { my $class = shift; # The most correct way to find my documents SCOPE: { require Win32; my $dir = Win32::GetFolderPath(Win32::CSIDL_LOCAL_APPDATA(), CREATE); return $dir if $dir and $class->_d($dir); } return undef; } sub my_music { my $class = shift; # The most correct way to find my music SCOPE: { require Win32; my $dir = Win32::GetFolderPath(Win32::CSIDL_MYMUSIC(), CREATE); return $dir if $dir and $class->_d($dir); } return undef; } sub my_pictures { my $class = shift; # The most correct way to find my pictures SCOPE: { require Win32; my $dir = Win32::GetFolderPath(Win32::CSIDL_MYPICTURES(), CREATE); return $dir if $dir and $class->_d($dir); } return undef; } sub my_videos { my $class = shift; # The most correct way to find my videos SCOPE: { require Win32; my $dir = Win32::GetFolderPath(Win32::CSIDL_MYVIDEO(), CREATE); return $dir if $dir and $class->_d($dir); } return undef; } # Special case version of -d sub _d { my $self = shift; my $path = shift; # Window can legally return a UNC path from GetFolderPath. # Not only is the meaning of -d complicated in this situation, # but even on a local network calling -d "\\\\cifs\\path" can # take several seconds. UNC can also do even weirder things, # like launching processes and such. # To avoid various crazy bugs caused by this, we do NOT attempt # to validate UNC paths at all so that the code that is calling # us has an opportunity to take special actions without our # blundering getting in the way. if ( $path =~ /\\\\/ ) { return 1; } # Otherwise do a stat as normal return -d $path; } 1; =pod =head1 NAME File::HomeDir::Windows - Find your home and other directories on Windows =head1 SYNOPSIS use File::HomeDir; # Find directories for the current user (eg. using Windows XP Professional) $home = File::HomeDir->my_home; # C:\Documents and Settings\mylogin $desktop = File::HomeDir->my_desktop; # C:\Documents and Settings\mylogin\Desktop $docs = File::HomeDir->my_documents; # C:\Documents and Settings\mylogin\My Documents $music = File::HomeDir->my_music; # C:\Documents and Settings\mylogin\My Documents\My Music $pics = File::HomeDir->my_pictures; # C:\Documents and Settings\mylogin\My Documents\My Pictures $videos = File::HomeDir->my_videos; # C:\Documents and Settings\mylogin\My Documents\My Video $data = File::HomeDir->my_data; # C:\Documents and Settings\mylogin\Local Settings\Application Data =head1 DESCRIPTION This module provides Windows-specific implementations for determining common user directories. In normal usage this module will always be used via L. Internally this module will use L::GetFolderPath to fetch the location of your directories. As a result of this, in certain unusual situations (usually found inside large organisations) the methods may return UNC paths such as C<\\cifs.local\home$>. If your application runs on Windows and you want to have it work comprehensively everywhere, you may need to implement your own handling for these paths as they can cause strange behaviour. For example, stat calls to UNC paths may work but block for several seconds, but opendir() may not be able to read any files (creating the appearance of an existing but empty directory). To avoid complicating the problem any further, in the rare situation that a UNC path is returned by C the usual -d validation checks will B be done. =head1 SUPPORT See the support section the main L module. =head1 AUTHORS Adam Kennedy Eadamk@cpan.orgE Sean M. Burke Esburke@cpan.orgE =head1 SEE ALSO L, L (legacy) =head1 COPYRIGHT Copyright 2005 - 2011 Adam Kennedy. Some parts copyright 2000 Sean M. Burke. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. The full text of the license can be found in the LICENSE file included with this module. =cut HomeDir/MacOS9.pm000064400000005674147634421560007514 0ustar00package File::HomeDir::MacOS9; # Half-assed implementation for the legacy Mac OS9 operating system. # Provided mainly to provide legacy compatibility. May be removed at # a later date. use 5.00503; use strict; use Carp (); use File::HomeDir::Driver (); use vars qw{$VERSION @ISA}; BEGIN { $VERSION = '1.00'; @ISA = 'File::HomeDir::Driver'; } # Load early if in a forking environment and we have # prefork, or at run-time if not. SCOPE: { local $@; eval "use prefork 'Mac::Files'"; } ##################################################################### # Current User Methods sub my_home { my $class = shift; # Try for $ENV{HOME} if we have it if ( defined $ENV{HOME} ) { return $ENV{HOME}; } ### DESPERATION SETS IN # We could use the desktop SCOPE: { local $@; eval { my $home = $class->my_desktop; return $home if $home and -d $home; }; } # Desperation on any platform SCOPE: { # On some platforms getpwuid dies if called at all local $SIG{'__DIE__'} = ''; my $home = (getpwuid($<))[7]; return $home if $home and -d $home; } Carp::croak("Could not locate current user's home directory"); } sub my_desktop { my $class = shift; # Find the desktop via Mac::Files local $SIG{'__DIE__'} = ''; require Mac::Files; my $home = Mac::Files::FindFolder( Mac::Files::kOnSystemDisk(), Mac::Files::kDesktopFolderType(), ); return $home if $home and -d $home; Carp::croak("Could not locate current user's desktop"); } ##################################################################### # General User Methods sub users_home { my ($class, $name) = @_; SCOPE: { # On some platforms getpwnam dies if called at all local $SIG{'__DIE__'} = ''; my $home = (getpwnam($name))[7]; return $home if defined $home and -d $home; } Carp::croak("Failed to find home directory for user '$name'"); } 1; =pod =head1 NAME File::HomeDir::MacOS9 - Find your home and other directories on legacy Macs =head1 SYNOPSIS use File::HomeDir; # Find directories for the current user $home = File::HomeDir->my_home; $desktop = File::HomeDir->my_desktop; =head1 DESCRIPTION This module provides implementations for determining common user directories on legacy Mac hosts. In normal usage this module will always be used via L. This module is no longer actively maintained, and is included only for extreme back-compatibility. Only the C and C methods are supported. =head1 SUPPORT See the support section the main L module. =head1 AUTHORS Adam Kennedy Eadamk@cpan.orgE Sean M. Burke Esburke@cpan.orgE =head1 SEE ALSO L =head1 COPYRIGHT Copyright 2005 - 2011 Adam Kennedy. Some parts copyright 2000 Sean M. Burke. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. The full text of the license can be found in the LICENSE file included with this module. =cut HomeDir/FreeDesktop.pm000064400000007063147634421560010666 0ustar00package File::HomeDir::FreeDesktop; # Specific functionality for unixes running free desktops # compatible with (but not using) File-BaseDir-0.03 # See POD at the end of the file for more documentation. use 5.00503; use strict; use Carp (); use File::Spec (); use File::Which (); use File::HomeDir::Unix (); use vars qw{$VERSION @ISA}; BEGIN { $VERSION = '1.00'; @ISA = 'File::HomeDir::Unix'; } # xdg uses $ENV{XDG_CONFIG_HOME}/user-dirs.dirs to know where are the # various "my xxx" directories. That is a shell file. The official API # is the xdg-user-dir executable. It has no provision for assessing # the directories of a user that is different than the one we are # running under; the standard substitute user mechanisms are needed to # overcome this. my $xdgprog = File::Which::which('xdg-user-dir'); sub _my { # No quoting because input is hard-coded and only comes from this module my $thingy = qx($xdgprog $_[1]); chomp $thingy; return $thingy; } # Simple stuff sub my_desktop { shift->_my('DESKTOP') } sub my_documents { shift->_my('DOCUMENTS') } sub my_music { shift->_my('MUSIC') } sub my_pictures { shift->_my('PICTURES') } sub my_videos { shift->_my('VIDEOS') } sub my_data { $ENV{XDG_DATA_HOME} or File::Spec->catdir( shift->my_home, qw{ .local share } ); } sub my_config { $ENV{XDG_CONFIG_HOME} or File::Spec->catdir( shift->my_home, qw{ .config } ); } # Custom locations (currently undocumented) sub my_download { shift->_my('DOWNLOAD') } sub my_publicshare { shift->_my('PUBLICSHARE') } sub my_templates { shift->_my('TEMPLATES') } sub my_cache { $ENV{XDG_CACHE_HOME} || File::Spec->catdir(shift->my_home, qw{ .cache }); } ##################################################################### # General User Methods sub users_desktop { Carp::croak('The users_desktop method is not available on an XDG based system.'); } sub users_documents { Carp::croak('The users_documents method is not available on an XDG based system.'); } sub users_music { Carp::croak('The users_music method is not available on an XDG based system.'); } sub users_pictures { Carp::croak('The users_pictures method is not available on an XDG based system.'); } sub users_videos { Carp::croak('The users_videos method is not available on an XDG based system.'); } sub users_data { Carp::croak('The users_data method is not available on an XDG based system.'); } 1; =pod =head1 NAME File::HomeDir::FreeDesktop - Find your home and other directories on FreeDesktop.org Unix =head1 DESCRIPTION This module provides implementations for determining common user directories. In normal usage this module will always be used via L. =head1 SYNOPSIS use File::HomeDir; # Find directories for the current user $home = File::HomeDir->my_home; # /home/mylogin $desktop = File::HomeDir->my_desktop; $docs = File::HomeDir->my_documents; $music = File::HomeDir->my_music; $pics = File::HomeDir->my_pictures; $videos = File::HomeDir->my_videos; $data = File::HomeDir->my_data; =head1 AUTHORS Jerome Quelin Ejquellin@cpan.org Adam Kennedy Eadamk@cpan.orgE =head1 SEE ALSO L, L (legacy) =head1 COPYRIGHT Copyright 2009 - 2011 Jerome Quelin. Some parts copyright 2010 Adam Kennedy. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. The full text of the license can be found in the LICENSE file included with this module. =cut HomeDir/Darwin/Carbon.pm000064400000010767147634421560011110 0ustar00package File::HomeDir::Darwin::Carbon; # Basic implementation for the Dawin family of operating systems. # This includes (most prominently) Mac OS X. use 5.00503; use strict; use Cwd (); use Carp (); use File::HomeDir::Darwin (); use vars qw{$VERSION @ISA}; BEGIN { $VERSION = '1.00'; # This is only a child class of the pure Perl darwin # class so that we can do homedir detection of all three # drivers at one via ->isa. @ISA = 'File::HomeDir::Darwin'; # Load early if in a forking environment and we have # prefork, or at run-time if not. local $@; eval "use prefork 'Mac::Files'"; } ##################################################################### # Current User Methods sub my_home { my $class = shift; # A lot of unix people and unix-derived tools rely on # the ability to overload HOME. We will support it too # so that they can replace raw HOME calls with File::HomeDir. if ( exists $ENV{HOME} and defined $ENV{HOME} ) { return $ENV{HOME}; } require Mac::Files; $class->_find_folder( Mac::Files::kCurrentUserFolderType(), ); } sub my_desktop { my $class = shift; require Mac::Files; $class->_find_folder( Mac::Files::kDesktopFolderType(), ); } sub my_documents { my $class = shift; require Mac::Files; $class->_find_folder( Mac::Files::kDocumentsFolderType(), ); } sub my_data { my $class = shift; require Mac::Files; $class->_find_folder( Mac::Files::kApplicationSupportFolderType(), ); } sub my_music { my $class = shift; require Mac::Files; $class->_find_folder( Mac::Files::kMusicDocumentsFolderType(), ); } sub my_pictures { my $class = shift; require Mac::Files; $class->_find_folder( Mac::Files::kPictureDocumentsFolderType(), ); } sub my_videos { my $class = shift; require Mac::Files; $class->_find_folder( Mac::Files::kMovieDocumentsFolderType(), ); } sub _find_folder { my $class = shift; my $name = shift; require Mac::Files; my $folder = Mac::Files::FindFolder( Mac::Files::kUserDomain(), $name, ); return undef unless defined $folder; unless ( -d $folder ) { # Make sure that symlinks resolve to directories. return undef unless -l $folder; my $dir = readlink $folder or return; return undef unless -d $dir; } return Cwd::abs_path($folder); } ##################################################################### # Arbitrary User Methods sub users_home { my $class = shift; my $home = $class->SUPER::users_home(@_); return defined $home ? Cwd::abs_path($home) : undef; } # in theory this can be done, but for now, let's cheat, since the # rest is Hard sub users_desktop { my ($class, $name) = @_; return undef if $name eq 'root'; $class->_to_user( $class->my_desktop, $name ); } sub users_documents { my ($class, $name) = @_; return undef if $name eq 'root'; $class->_to_user( $class->my_documents, $name ); } sub users_data { my ($class, $name) = @_; $class->_to_user( $class->my_data, $name ) || $class->users_home($name); } # cheap hack ... not entirely reliable, perhaps, but ... c'est la vie, since # there's really no other good way to do it at this time, that i know of -- pudge sub _to_user { my ($class, $path, $name) = @_; my $my_home = $class->my_home; my $users_home = $class->users_home($name); defined $users_home or return undef; $path =~ s/^\Q$my_home/$users_home/; return $path; } 1; =pod =head1 NAME File::HomeDir::Darwin - Find your home and other directories on Darwin (OS X) =head1 DESCRIPTION This module provides Darwin-specific implementations for determining common user directories. In normal usage this module will always be used via L. Note -- since this module requires Mac::Carbon and Mac::Carbon does not work with 64-bit perls, on such systems, File::HomeDir will try L and then fall back to the (pure Perl) L. =head1 SYNOPSIS use File::HomeDir; # Find directories for the current user $home = File::HomeDir->my_home; # /Users/mylogin $desktop = File::HomeDir->my_desktop; # /Users/mylogin/Desktop $docs = File::HomeDir->my_documents; # /Users/mylogin/Documents $music = File::HomeDir->my_music; # /Users/mylogin/Music $pics = File::HomeDir->my_pictures; # /Users/mylogin/Pictures $videos = File::HomeDir->my_videos; # /Users/mylogin/Movies $data = File::HomeDir->my_data; # /Users/mylogin/Library/Application Support =head1 TODO =over 4 =item * Test with Mac OS (versions 7, 8, 9) =item * Some better way for users_* ? =back HomeDir/Darwin/Cocoa.pm000064400000007001147634421560010713 0ustar00package File::HomeDir::Darwin::Cocoa; use 5.00503; use strict; use Cwd (); use Carp (); use File::HomeDir::Darwin (); use vars qw{$VERSION @ISA}; BEGIN { $VERSION = '1.00'; @ISA = 'File::HomeDir::Darwin'; # Load early if in a forking environment and we have # prefork, or at run-time if not. local $@; eval "use prefork 'Mac::SystemDirectory'"; } ##################################################################### # Current User Methods sub my_home { my $class = shift; # A lot of unix people and unix-derived tools rely on # the ability to overload HOME. We will support it too # so that they can replace raw HOME calls with File::HomeDir. if ( exists $ENV{HOME} and defined $ENV{HOME} ) { return $ENV{HOME}; } require Mac::SystemDirectory; return Mac::SystemDirectory::HomeDirectory(); } # from 10.4 sub my_desktop { my $class = shift; require Mac::SystemDirectory; eval { $class->_find_folder(Mac::SystemDirectory::NSDesktopDirectory()) } || $class->SUPER::my_desktop; } # from 10.2 sub my_documents { my $class = shift; require Mac::SystemDirectory; eval { $class->_find_folder(Mac::SystemDirectory::NSDocumentDirectory()) } || $class->SUPER::my_documents; } # from 10.4 sub my_data { my $class = shift; require Mac::SystemDirectory; eval { $class->_find_folder(Mac::SystemDirectory::NSApplicationSupportDirectory()) } || $class->SUPER::my_data; } # from 10.6 sub my_music { my $class = shift; require Mac::SystemDirectory; eval { $class->_find_folder(Mac::SystemDirectory::NSMusicDirectory()) } || $class->SUPER::my_music; } # from 10.6 sub my_pictures { my $class = shift; require Mac::SystemDirectory; eval { $class->_find_folder(Mac::SystemDirectory::NSPicturesDirectory()) } || $class->SUPER::my_pictures; } # from 10.6 sub my_videos { my $class = shift; require Mac::SystemDirectory; eval { $class->_find_folder(Mac::SystemDirectory::NSMoviesDirectory()) } || $class->SUPER::my_videos; } sub _find_folder { my $class = shift; my $name = shift; require Mac::SystemDirectory; my $folder = Mac::SystemDirectory::FindDirectory($name); return undef unless defined $folder; unless ( -d $folder ) { # Make sure that symlinks resolve to directories. return undef unless -l $folder; my $dir = readlink $folder or return; return undef unless -d $dir; } return Cwd::abs_path($folder); } 1; =pod =head1 NAME File::HomeDir::Darwin::Cocoa - Find your home and other directories on Darwin (OS X) =head1 DESCRIPTION This module provides Darwin-specific implementations for determining common user directories using Cocoa API through L. In normal usage this module will always be used via L. Theoretically, this should return the same paths as both of the other Darwin drivers. Because this module requires L, if the module is not installed, L will fall back to L. =head1 SYNOPSIS use File::HomeDir; # Find directories for the current user $home = File::HomeDir->my_home; # /Users/mylogin $desktop = File::HomeDir->my_desktop; # /Users/mylogin/Desktop $docs = File::HomeDir->my_documents; # /Users/mylogin/Documents $music = File::HomeDir->my_music; # /Users/mylogin/Music $pics = File::HomeDir->my_pictures; # /Users/mylogin/Pictures $videos = File::HomeDir->my_videos; # /Users/mylogin/Movies $data = File::HomeDir->my_data; # /Users/mylogin/Library/Application Support =cut Fetch.pm000064400000131774147634421560006164 0ustar00package File::Fetch; use strict; use FileHandle; use File::Temp; use File::Copy; use File::Spec; use File::Spec::Unix; use File::Basename qw[dirname]; use Cwd qw[cwd]; use Carp qw[carp]; use IPC::Cmd qw[can_run run QUOTE]; use File::Path qw[mkpath]; use File::Temp qw[tempdir]; use Params::Check qw[check]; use Module::Load::Conditional qw[can_load]; use Locale::Maketext::Simple Style => 'gettext'; use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT $BLACKLIST $METHOD_FAIL $VERSION $METHODS $FTP_PASSIVE $TIMEOUT $DEBUG $WARN ]; $VERSION = '0.42'; $VERSION = eval $VERSION; # avoid warnings with development releases $PREFER_BIN = 0; # XXX TODO implement $FROM_EMAIL = 'File-Fetch@example.com'; $USER_AGENT = "File::Fetch/$VERSION"; $BLACKLIST = [qw|ftp|]; $METHOD_FAIL = { }; $FTP_PASSIVE = 1; $TIMEOUT = 0; $DEBUG = 0; $WARN = 1; ### methods available to fetch the file depending on the scheme $METHODS = { http => [ qw|lwp httptiny wget curl lftp fetch httplite lynx iosock| ], ftp => [ qw|lwp netftp wget curl lftp fetch ncftp ftp| ], file => [ qw|lwp lftp file| ], rsync => [ qw|rsync| ], git => [ qw|git| ], }; ### silly warnings ### local $Params::Check::VERBOSE = 1; local $Params::Check::VERBOSE = 1; local $Module::Load::Conditional::VERBOSE = 0; local $Module::Load::Conditional::VERBOSE = 0; ### see what OS we are on, important for file:// uris ### use constant ON_WIN => ($^O eq 'MSWin32'); use constant ON_VMS => ($^O eq 'VMS'); use constant ON_UNIX => (!ON_WIN); use constant HAS_VOL => (ON_WIN); use constant HAS_SHARE => (ON_WIN); use constant HAS_FETCH => ( $^O =~ m!^(freebsd|netbsd|dragonfly)$! ); =pod =head1 NAME File::Fetch - A generic file fetching mechanism =head1 SYNOPSIS use File::Fetch; ### build a File::Fetch object ### my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt'); ### fetch the uri to cwd() ### my $where = $ff->fetch() or die $ff->error; ### fetch the uri to /tmp ### my $where = $ff->fetch( to => '/tmp' ); ### parsed bits from the uri ### $ff->uri; $ff->scheme; $ff->host; $ff->path; $ff->file; =head1 DESCRIPTION File::Fetch is a generic file fetching mechanism. It allows you to fetch any file pointed to by a C, C, C, C or C uri by a number of different means. See the C section further down for details. =head1 ACCESSORS A C object has the following accessors =over 4 =item $ff->uri The uri you passed to the constructor =item $ff->scheme The scheme from the uri (like 'file', 'http', etc) =item $ff->host The hostname in the uri. Will be empty if host was originally 'localhost' for a 'file://' url. =item $ff->vol On operating systems with the concept of a volume the second element of a file:// is considered to the be volume specification for the file. Thus on Win32 this routine returns the volume, on other operating systems this returns nothing. On Windows this value may be empty if the uri is to a network share, in which case the 'share' property will be defined. Additionally, volume specifications that use '|' as ':' will be converted on read to use ':'. On VMS, which has a volume concept, this field will be empty because VMS file specifications are converted to absolute UNIX format and the volume information is transparently included. =item $ff->share On systems with the concept of a network share (currently only Windows) returns the sharename from a file://// url. On other operating systems returns empty. =item $ff->path The path from the uri, will be at least a single '/'. =item $ff->file The name of the remote file. For the local file name, the result of $ff->output_file will be used. =item $ff->file_default The name of the default local file, that $ff->output_file falls back to if it would otherwise return no filename. For example when fetching a URI like http://www.abc.net.au/ the contents retrieved may be from a remote file called 'index.html'. The default value of this attribute is literally 'file_default'. =cut ########################## ### Object & Accessors ### ########################## { ### template for autogenerated accessors ### my $Tmpl = { scheme => { default => 'http' }, host => { default => 'localhost' }, path => { default => '/' }, file => { required => 1 }, uri => { required => 1 }, vol => { default => '' }, # windows for file:// uris share => { default => '' }, # windows for file:// uris file_default => { default => 'file_default' }, tempdir_root => { required => 1 }, # Should be lazy-set at ->new() _error_msg => { no_override => 1 }, _error_msg_long => { no_override => 1 }, }; for my $method ( keys %$Tmpl ) { no strict 'refs'; *$method = sub { my $self = shift; $self->{$method} = $_[0] if @_; return $self->{$method}; } } sub _create { my $class = shift; my %hash = @_; my $args = check( $Tmpl, \%hash ) or return; bless $args, $class; if( lc($args->scheme) ne 'file' and not $args->host ) { return $class->_error(loc( "Hostname required when fetching from '%1'",$args->scheme)); } for (qw[path]) { unless( $args->$_() ) { # 5.5.x needs the () return $class->_error(loc("No '%1' specified",$_)); } } return $args; } } =item $ff->output_file The name of the output file. This is the same as $ff->file, but any query parameters are stripped off. For example: http://example.com/index.html?x=y would make the output file be C rather than C. =back =cut sub output_file { my $self = shift; my $file = $self->file; $file =~ s/\?.*$//g; $file ||= $self->file_default; return $file; } ### XXX do this or just point to URI::Escape? # =head2 $esc_uri = $ff->escaped_uri # # =cut # # ### most of this is stolen straight from URI::escape # { ### Build a char->hex map # my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255; # # sub escaped_uri { # my $self = shift; # my $uri = $self->uri; # # ### Default unsafe characters. RFC 2732 ^(uric - reserved) # $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/ # $escapes{$1} || $self->_fail_hi($1)/ge; # # return $uri; # } # # sub _fail_hi { # my $self = shift; # my $char = shift; # # $self->_error(loc( # "Can't escape '%1', try using the '%2' module instead", # sprintf("\\x{%04X}", ord($char)), 'URI::Escape' # )); # } # # sub output_file { # # } # # # } =head1 METHODS =head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' ); Parses the uri and creates a corresponding File::Fetch::Item object, that is ready to be Ced and returns it. Returns false on failure. =cut sub new { my $class = shift; my %hash = @_; my ($uri, $file_default, $tempdir_root); my $tmpl = { uri => { required => 1, store => \$uri }, file_default => { required => 0, store => \$file_default }, tempdir_root => { required => 0, store => \$tempdir_root }, }; check( $tmpl, \%hash ) or return; ### parse the uri to usable parts ### my $href = $class->_parse_uri( $uri ) or return; $href->{file_default} = $file_default if $file_default; $href->{tempdir_root} = File::Spec->rel2abs( $tempdir_root ) if $tempdir_root; $href->{tempdir_root} = File::Spec->rel2abs( Cwd::cwd ) if not $href->{tempdir_root}; ### make it into a FFI object ### my $ff = $class->_create( %$href ) or return; ### return the object ### return $ff; } ### parses an uri to a hash structure: ### ### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' ) ### ### becomes: ### ### $href = { ### scheme => 'ftp', ### host => 'ftp.cpan.org', ### path => '/pub/mirror', ### file => 'index.html' ### }; ### ### In the case of file:// urls there maybe be additional fields ### ### For systems with volume specifications such as Win32 there will be ### a volume specifier provided in the 'vol' field. ### ### 'vol' => 'volumename' ### ### For windows file shares there may be a 'share' key specified ### ### 'share' => 'sharename' ### ### Note that the rules of what a file:// url means vary by the operating system ### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious ### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and ### not '/foo/bar.txt' ### ### Similarly if the host interpreting the url is VMS then ### file:///disk$user/my/notes/note12345.txt' means ### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as ### if it is unix where it means /disk$user/my/notes/note12345.txt'. ### Except for some cases in the File::Spec methods, Perl on VMS will generally ### handle UNIX format file specifications. ### ### This means it is impossible to serve certain file:// urls on certain systems. ### ### Thus are the problems with a protocol-less specification. :-( ### sub _parse_uri { my $self = shift; my $uri = shift or return; my $href = { uri => $uri }; ### find the scheme ### $uri =~ s|^(\w+)://||; $href->{scheme} = $1; ### See rfc 1738 section 3.10 ### http://www.faqs.org/rfcs/rfc1738.html ### And wikipedia for more on windows file:// urls ### http://en.wikipedia.org/wiki/File:// if( $href->{scheme} eq 'file' ) { my @parts = split '/',$uri; ### file://hostname/... ### file://hostname/... ### normalize file://localhost with file:/// $href->{host} = $parts[0] || ''; ### index in @parts where the path components begin; my $index = 1; ### file:////hostname/sharename/blah.txt if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) { $href->{host} = $parts[2] || ''; # avoid warnings $href->{share} = $parts[3] || ''; # avoid warnings $index = 4 # index after the share ### file:///D|/blah.txt ### file:///D:/blah.txt } elsif (HAS_VOL) { ### this code comes from dmq's patch, but: ### XXX if volume is empty, wouldn't that be an error? --kane ### if so, our file://localhost test needs to be fixed as wel $href->{vol} = $parts[1] || ''; ### correct D| style colume descriptors $href->{vol} =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN; $index = 2; # index after the volume } ### rebuild the path from the leftover parts; $href->{path} = join '/', '', splice( @parts, $index, $#parts ); } else { ### using anything but qw() in hash slices may produce warnings ### in older perls :-( @{$href}{ qw(host path) } = $uri =~ m|([^/]*)(/.*)$|s; } ### split the path into file + dir ### { my @parts = File::Spec::Unix->splitpath( delete $href->{path} ); $href->{path} = $parts[1]; $href->{file} = $parts[2]; } ### host will be empty if the target was 'localhost' and the ### scheme was 'file' $href->{host} = '' if ($href->{host} eq 'localhost') and ($href->{scheme} eq 'file'); return $href; } =head2 $where = $ff->fetch( [to => /my/output/dir/ | \$scalar] ) Fetches the file you requested and returns the full path to the file. By default it writes to C, but you can override that by specifying the C argument: ### file fetch to /tmp, full path to the file in $where $where = $ff->fetch( to => '/tmp' ); ### file slurped into $scalar, full path to the file in $where ### file is downloaded to a temp directory and cleaned up at exit time $where = $ff->fetch( to => \$scalar ); Returns the full path to the downloaded file on success, and false on failure. =cut sub fetch { my $self = shift or return; my %hash = @_; my $target; my $tmpl = { to => { default => cwd(), store => \$target }, }; check( $tmpl, \%hash ) or return; my ($to, $fh); ### you want us to slurp the contents if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) { $to = tempdir( 'FileFetch.XXXXXX', DIR => $self->tempdir_root, CLEANUP => 1 ); ### plain old fetch } else { $to = $target; ### On VMS force to VMS format so File::Spec will work. $to = VMS::Filespec::vmspath($to) if ON_VMS; ### create the path if it doesn't exist yet ### unless( -d $to ) { eval { mkpath( $to ) }; return $self->_error(loc("Could not create path '%1'",$to)) if $@; } } ### set passive ftp if required ### local $ENV{FTP_PASSIVE} = $FTP_PASSIVE; ### we dont use catfile on win32 because if we are using a cygwin tool ### under cmd.exe they wont understand windows style separators. my $out_to = ON_WIN ? $to.'/'.$self->output_file : File::Spec->catfile( $to, $self->output_file ); for my $method ( @{ $METHODS->{$self->scheme} } ) { my $sub = '_'.$method.'_fetch'; unless( __PACKAGE__->can($sub) ) { $self->_error(loc("Cannot call method for '%1' -- WEIRD!", $method)); next; } ### method is blacklisted ### next if grep { lc $_ eq $method } @$BLACKLIST; ### method is known to fail ### next if $METHOD_FAIL->{$method}; ### there's serious issues with IPC::Run and quoting of command ### line arguments. using quotes in the wrong place breaks things, ### and in the case of say, ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document ### "index.html" "http://www.cpan.org/index.html?q=1&y=2" ### it doesn't matter how you quote, it always fails. local $IPC::Cmd::USE_IPC_RUN = 0; if( my $file = $self->$sub( to => $out_to )){ unless( -e $file && -s _ ) { $self->_error(loc("'%1' said it fetched '%2', ". "but it was not created",$method,$file)); ### mark the failure ### $METHOD_FAIL->{$method} = 1; next; } else { ### slurp mode? if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) { ### open the file open my $fh, "<$file" or do { $self->_error( loc("Could not open '%1': %2", $file, $!)); return; }; ### slurp $$target = do { local $/; <$fh> }; } my $abs = File::Spec->rel2abs( $file ); return $abs; } } } ### if we got here, we looped over all methods, but we weren't able ### to fetch it. return; } ######################## ### _*_fetch methods ### ######################## ### LWP fetching ### sub _lwp_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; ### modules required to download with lwp ### my $use_list = { LWP => '0.0', 'LWP::UserAgent' => '0.0', 'HTTP::Request' => '0.0', 'HTTP::Status' => '0.0', URI => '0.0', }; unless( can_load( modules => $use_list ) ) { $METHOD_FAIL->{'lwp'} = 1; return; } ### setup the uri object my $uri = URI->new( File::Spec::Unix->catfile( $self->path, $self->file ) ); ### special rules apply for file:// uris ### $uri->scheme( $self->scheme ); $uri->host( $self->scheme eq 'file' ? '' : $self->host ); $uri->userinfo("anonymous:$FROM_EMAIL") if $self->scheme ne 'file'; ### set up the useragent object my $ua = LWP::UserAgent->new(); $ua->timeout( $TIMEOUT ) if $TIMEOUT; $ua->agent( $USER_AGENT ); $ua->from( $FROM_EMAIL ); $ua->env_proxy; my $res = $ua->mirror($uri, $to) or return; ### uptodate or fetched ok ### if ( $res->code == 304 or $res->code == 200 ) { return $to; } else { return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]", $res->code, HTTP::Status::status_message($res->code), $res->status_line)); } } ### HTTP::Tiny fetching ### sub _httptiny_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; my $use_list = { 'HTTP::Tiny' => '0.008', }; unless( can_load(modules => $use_list) ) { $METHOD_FAIL->{'httptiny'} = 1; return; } my $uri = $self->uri; my $http = HTTP::Tiny->new( ( $TIMEOUT ? ( timeout => $TIMEOUT ) : () ) ); my $rc = $http->mirror( $uri, $to ); unless ( $rc->{success} ) { return $self->_error(loc( "Fetch failed! HTTP response: %1 [%2]", $rc->{status}, $rc->{reason} ) ); } return $to; } ### HTTP::Lite fetching ### sub _httplite_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; ### modules required to download with lwp ### my $use_list = { 'HTTP::Lite' => '2.2', }; unless( can_load(modules => $use_list) ) { $METHOD_FAIL->{'httplite'} = 1; return; } my $uri = $self->uri; my $retries = 0; RETRIES: while ( $retries++ < 5 ) { my $http = HTTP::Lite->new(); # Naughty naughty but there isn't any accessor/setter $http->{timeout} = $TIMEOUT if $TIMEOUT; $http->http11_mode(1); my $fh = FileHandle->new; unless ( $fh->open($to,'>') ) { return $self->_error(loc( "Could not open '%1' for writing: %2",$to,$!)); } $fh->autoflush(1); binmode $fh; my $rc = $http->request( $uri, sub { my ($self,$dref,$cbargs) = @_; local $\; print {$cbargs} $$dref }, $fh ); close $fh; if ( $rc == 301 || $rc == 302 ) { my $loc; HEADERS: for ($http->headers_array) { /Location: (\S+)/ and $loc = $1, last HEADERS; } #$loc or last; # Think we should squeal here. if ($loc =~ m!^/!) { $uri =~ s{^(\w+?://[^/]+)/.*$}{$1}; $uri .= $loc; } else { $uri = $loc; } next RETRIES; } elsif ( $rc == 200 ) { return $to; } else { return $self->_error(loc("Fetch failed! HTTP response: %1 [%2]", $rc, $http->status_message)); } } # Loop for 5 retries. return $self->_error("Fetch failed! Gave up after 5 tries"); } ### Simple IO::Socket::INET fetching ### sub _iosock_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; my $use_list = { 'IO::Socket::INET' => '0.0', 'IO::Select' => '0.0', }; unless( can_load(modules => $use_list) ) { $METHOD_FAIL->{'iosock'} = 1; return; } my $sock = IO::Socket::INET->new( PeerHost => $self->host, ( $self->host =~ /:/ ? () : ( PeerPort => 80 ) ), ); unless ( $sock ) { return $self->_error(loc("Could not open socket to '%1', '%2'",$self->host,$!)); } my $fh = FileHandle->new; # Check open() unless ( $fh->open($to,'>') ) { return $self->_error(loc( "Could not open '%1' for writing: %2",$to,$!)); } $fh->autoflush(1); binmode $fh; my $path = File::Spec::Unix->catfile( $self->path, $self->file ); my $req = "GET $path HTTP/1.0\x0d\x0aHost: " . $self->host . "\x0d\x0a\x0d\x0a"; $sock->send( $req ); my $select = IO::Select->new( $sock ); my $resp = ''; my $normal = 0; while ( $select->can_read( $TIMEOUT || 60 ) ) { my $ret = $sock->sysread( $resp, 4096, length($resp) ); if ( !defined $ret or $ret == 0 ) { $select->remove( $sock ); $normal++; } } close $sock; unless ( $normal ) { return $self->_error(loc("Socket timed out after '%1' seconds", ( $TIMEOUT || 60 ))); } # Check the "response" # Strip preceding blank lines apparently they are allowed (RFC 2616 4.1) $resp =~ s/^(\x0d?\x0a)+//; # Check it is an HTTP response unless ( $resp =~ m!^HTTP/(\d+)\.(\d+)!i ) { return $self->_error(loc("Did not get a HTTP response from '%1'",$self->host)); } # Check for OK my ($code) = $resp =~ m!^HTTP/\d+\.\d+\s+(\d+)!i; unless ( $code eq '200' ) { return $self->_error(loc("Got a '%1' from '%2' expected '200'",$code,$self->host)); } { local $\; print $fh +($resp =~ m/\x0d\x0a\x0d\x0a(.*)$/s )[0]; } close $fh; return $to; } ### Net::FTP fetching sub _netftp_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; ### required modules ### my $use_list = { 'Net::FTP' => 0 }; unless( can_load( modules => $use_list ) ) { $METHOD_FAIL->{'netftp'} = 1; return; } ### make connection ### my $ftp; my @options = ($self->host); push(@options, Timeout => $TIMEOUT) if $TIMEOUT; unless( $ftp = Net::FTP->new( @options ) ) { return $self->_error(loc("Ftp creation failed: %1",$@)); } ### login ### unless( $ftp->login( anonymous => $FROM_EMAIL ) ) { return $self->_error(loc("Could not login to '%1'",$self->host)); } ### set binary mode, just in case ### $ftp->binary; ### create the remote path ### remember remote paths are unix paths! [#11483] my $remote = File::Spec::Unix->catfile( $self->path, $self->file ); ### fetch the file ### my $target; unless( $target = $ftp->get( $remote, $to ) ) { return $self->_error(loc("Could not fetch '%1' from '%2'", $remote, $self->host)); } ### log out ### $ftp->quit; return $target; } ### /bin/wget fetch ### sub _wget_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; my $wget; ### see if we have a wget binary ### unless( $wget = can_run('wget') ) { $METHOD_FAIL->{'wget'} = 1; return; } ### no verboseness, thanks ### my $cmd = [ $wget, '--quiet' ]; ### if a timeout is set, add it ### push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT; ### run passive if specified ### push @$cmd, '--passive-ftp' if $FTP_PASSIVE; ### set the output document, add the uri ### push @$cmd, '--output-document', $to, $self->uri; ### with IPC::Cmd > 0.41, this is fixed in teh library, ### and there's no need for special casing any more. ### DO NOT quote things for IPC::Run, it breaks stuff. # $IPC::Cmd::USE_IPC_RUN # ? ($to, $self->uri) # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); ### shell out ### my $captured; unless(run( command => $cmd, buffer => \$captured, verbose => $DEBUG )) { ### wget creates the output document always, even if the fetch ### fails.. so unlink it in that case 1 while unlink $to; return $self->_error(loc( "Command failed: %1", $captured || '' )); } return $to; } ### /bin/lftp fetch ### sub _lftp_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; ### see if we have a lftp binary ### my $lftp; unless( $lftp = can_run('lftp') ) { $METHOD_FAIL->{'lftp'} = 1; return; } ### no verboseness, thanks ### my $cmd = [ $lftp, '-f' ]; my $fh = File::Temp->new; my $str; ### if a timeout is set, add it ### $str .= "set net:timeout $TIMEOUT;\n" if $TIMEOUT; ### run passive if specified ### $str .= "set ftp:passive-mode 1;\n" if $FTP_PASSIVE; ### set the output document, add the uri ### ### quote the URI, because lftp supports certain shell ### expansions, most notably & for backgrounding. ### ' quote does nto work, must be " $str .= q[get ']. $self->uri .q[' -o ]. $to . $/; if( $DEBUG ) { my $pp_str = join ' ', split $/, $str; print "# lftp command: $pp_str\n"; } ### write straight to the file. $fh->autoflush(1); print $fh $str; ### the command needs to be 1 string to be executed push @$cmd, $fh->filename; ### with IPC::Cmd > 0.41, this is fixed in teh library, ### and there's no need for special casing any more. ### DO NOT quote things for IPC::Run, it breaks stuff. # $IPC::Cmd::USE_IPC_RUN # ? ($to, $self->uri) # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); ### shell out ### my $captured; unless(run( command => $cmd, buffer => \$captured, verbose => $DEBUG )) { ### wget creates the output document always, even if the fetch ### fails.. so unlink it in that case 1 while unlink $to; return $self->_error(loc( "Command failed: %1", $captured || '' )); } return $to; } ### /bin/ftp fetch ### sub _ftp_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; ### see if we have a ftp binary ### my $ftp; unless( $ftp = can_run('ftp') ) { $METHOD_FAIL->{'ftp'} = 1; return; } my $fh = FileHandle->new; local $SIG{CHLD} = 'IGNORE'; unless ($fh->open("$ftp -n", '|-')) { return $self->_error(loc("%1 creation failed: %2", $ftp, $!)); } my @dialog = ( "lcd " . dirname($to), "open " . $self->host, "user anonymous $FROM_EMAIL", "cd /", "cd " . $self->path, "binary", "get " . $self->file . " " . $self->output_file, "quit", ); foreach (@dialog) { $fh->print($_, "\n") } $fh->close or return; return $to; } ### lynx is stupid - it decompresses any .gz file it finds to be text ### use /bin/lynx to fetch files sub _lynx_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; ### see if we have a lynx binary ### my $lynx; unless ( $lynx = can_run('lynx') ){ $METHOD_FAIL->{'lynx'} = 1; return; } unless( IPC::Cmd->can_capture_buffer ) { $METHOD_FAIL->{'lynx'} = 1; return $self->_error(loc( "Can not capture buffers. Can not use '%1' to fetch files", 'lynx' )); } ### check if the HTTP resource exists ### if ($self->uri =~ /^https?:\/\//i) { my $cmd = [ $lynx, '-head', '-source', "-auth=anonymous:$FROM_EMAIL", ]; push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT; push @$cmd, $self->uri; ### shell out ### my $head; unless(run( command => $cmd, buffer => \$head, verbose => $DEBUG ) ) { return $self->_error(loc("Command failed: %1", $head || '')); } unless($head =~ /^HTTP\/\d+\.\d+ 200\b/) { return $self->_error(loc("Command failed: %1", $head || '')); } } ### write to the output file ourselves, since lynx ass_u_mes to much my $local = FileHandle->new( $to, 'w' ) or return $self->_error(loc( "Could not open '%1' for writing: %2",$to,$!)); ### dump to stdout ### my $cmd = [ $lynx, '-source', "-auth=anonymous:$FROM_EMAIL", ]; push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT; ### DO NOT quote things for IPC::Run, it breaks stuff. push @$cmd, $self->uri; ### with IPC::Cmd > 0.41, this is fixed in teh library, ### and there's no need for special casing any more. ### DO NOT quote things for IPC::Run, it breaks stuff. # $IPC::Cmd::USE_IPC_RUN # ? $self->uri # : QUOTE. $self->uri .QUOTE; ### shell out ### my $captured; unless(run( command => $cmd, buffer => \$captured, verbose => $DEBUG ) ) { return $self->_error(loc("Command failed: %1", $captured || '')); } ### print to local file ### ### XXX on a 404 with a special error page, $captured will actually ### hold the contents of that page, and make it *appear* like the ### request was a success, when really it wasn't :( ### there doesn't seem to be an option for lynx to change the exit ### code based on a 4XX status or so. ### the closest we can come is using --error_file and parsing that, ### which is very unreliable ;( $local->print( $captured ); $local->close or return; return $to; } ### use /bin/ncftp to fetch files sub _ncftp_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; ### we can only set passive mode in interactive sessions, so bail out ### if $FTP_PASSIVE is set return if $FTP_PASSIVE; ### see if we have a ncftp binary ### my $ncftp; unless( $ncftp = can_run('ncftp') ) { $METHOD_FAIL->{'ncftp'} = 1; return; } my $cmd = [ $ncftp, '-V', # do not be verbose '-p', $FROM_EMAIL, # email as password $self->host, # hostname dirname($to), # local dir for the file # remote path to the file ### DO NOT quote things for IPC::Run, it breaks stuff. $IPC::Cmd::USE_IPC_RUN ? File::Spec::Unix->catdir( $self->path, $self->file ) : QUOTE. File::Spec::Unix->catdir( $self->path, $self->file ) .QUOTE ]; ### shell out ### my $captured; unless(run( command => $cmd, buffer => \$captured, verbose => $DEBUG ) ) { return $self->_error(loc("Command failed: %1", $captured || '')); } return $to; } ### use /bin/curl to fetch files sub _curl_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; my $curl; unless ( $curl = can_run('curl') ) { $METHOD_FAIL->{'curl'} = 1; return; } ### these long opts are self explanatory - I like that -jmb my $cmd = [ $curl, '-q' ]; push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT; push(@$cmd, '--silent') unless $DEBUG; ### curl does the right thing with passive, regardless ### if ($self->scheme eq 'ftp') { push(@$cmd, '--user', "anonymous:$FROM_EMAIL"); } ### curl doesn't follow 302 (temporarily moved) etc automatically ### so we add --location to enable that. push @$cmd, '--fail', '--location', '--output', $to, $self->uri; ### with IPC::Cmd > 0.41, this is fixed in teh library, ### and there's no need for special casing any more. ### DO NOT quote things for IPC::Run, it breaks stuff. # $IPC::Cmd::USE_IPC_RUN # ? ($to, $self->uri) # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); my $captured; unless(run( command => $cmd, buffer => \$captured, verbose => $DEBUG ) ) { return $self->_error(loc("Command failed: %1", $captured || '')); } return $to; } ### /usr/bin/fetch fetch! ### sub _fetch_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; ### see if we have a fetch binary ### my $fetch; unless( HAS_FETCH and $fetch = can_run('fetch') ) { $METHOD_FAIL->{'fetch'} = 1; return; } ### no verboseness, thanks ### my $cmd = [ $fetch, '-q' ]; ### if a timeout is set, add it ### push(@$cmd, '-T', $TIMEOUT) if $TIMEOUT; ### run passive if specified ### #push @$cmd, '-p' if $FTP_PASSIVE; local $ENV{'FTP_PASSIVE_MODE'} = 1 if $FTP_PASSIVE; ### set the output document, add the uri ### push @$cmd, '-o', $to, $self->uri; ### with IPC::Cmd > 0.41, this is fixed in teh library, ### and there's no need for special casing any more. ### DO NOT quote things for IPC::Run, it breaks stuff. # $IPC::Cmd::USE_IPC_RUN # ? ($to, $self->uri) # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); ### shell out ### my $captured; unless(run( command => $cmd, buffer => \$captured, verbose => $DEBUG )) { ### wget creates the output document always, even if the fetch ### fails.. so unlink it in that case 1 while unlink $to; return $self->_error(loc( "Command failed: %1", $captured || '' )); } return $to; } ### use File::Copy for fetching file:// urls ### ### ### See section 3.10 of RFC 1738 (http://www.faqs.org/rfcs/rfc1738.html) ### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://) ### sub _file_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; ### prefix a / on unix systems with a file uri, since it would ### look somewhat like this: ### file:///home/kane/file ### whereas windows file uris for 'c:\some\dir\file' might look like: ### file:///C:/some/dir/file ### file:///C|/some/dir/file ### or for a network share '\\host\share\some\dir\file': ### file:////host/share/some/dir/file ### ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like: ### file://vms.host.edu/disk$user/my/notes/note12345.txt ### my $path = $self->path; my $vol = $self->vol; my $share = $self->share; my $remote; if (!$share and $self->host) { return $self->_error(loc( "Currently %1 cannot handle hosts in %2 urls", 'File::Fetch', 'file://' )); } if( $vol ) { $path = File::Spec->catdir( split /\//, $path ); $remote = File::Spec->catpath( $vol, $path, $self->file); } elsif( $share ) { ### win32 specific, and a share name, so we wont bother with File::Spec $path =~ s|/+|\\|g; $remote = "\\\\".$self->host."\\$share\\$path"; } else { ### File::Spec on VMS can not currently handle UNIX syntax. my $file_class = ON_VMS ? 'File::Spec::Unix' : 'File::Spec'; $remote = $file_class->catfile( $path, $self->file ); } ### File::Copy is littered with 'die' statements :( ### my $rv = eval { File::Copy::copy( $remote, $to ) }; ### something went wrong ### if( !$rv or $@ ) { return $self->_error(loc("Could not copy '%1' to '%2': %3 %4", $remote, $to, $!, $@)); } return $to; } ### use /usr/bin/rsync to fetch files sub _rsync_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; my $rsync; unless ( $rsync = can_run('rsync') ) { $METHOD_FAIL->{'rsync'} = 1; return; } my $cmd = [ $rsync ]; ### XXX: rsync has no I/O timeouts at all, by default push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT; push(@$cmd, '--quiet') unless $DEBUG; ### DO NOT quote things for IPC::Run, it breaks stuff. push @$cmd, $self->uri, $to; ### with IPC::Cmd > 0.41, this is fixed in teh library, ### and there's no need for special casing any more. ### DO NOT quote things for IPC::Run, it breaks stuff. # $IPC::Cmd::USE_IPC_RUN # ? ($to, $self->uri) # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); my $captured; unless(run( command => $cmd, buffer => \$captured, verbose => $DEBUG ) ) { return $self->_error(loc("Command %1 failed: %2", "@$cmd" || '', $captured || '')); } return $to; } ### use git to fetch files sub _git_fetch { my $self = shift; my %hash = @_; my ($to); my $tmpl = { to => { required => 1, store => \$to } }; check( $tmpl, \%hash ) or return; my $git; unless ( $git = can_run('git') ) { $METHOD_FAIL->{'git'} = 1; return; } my $cmd = [ $git, 'clone' ]; #push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT; push(@$cmd, '--quiet') unless $DEBUG; ### DO NOT quote things for IPC::Run, it breaks stuff. push @$cmd, $self->uri, $to; ### with IPC::Cmd > 0.41, this is fixed in teh library, ### and there's no need for special casing any more. ### DO NOT quote things for IPC::Run, it breaks stuff. # $IPC::Cmd::USE_IPC_RUN # ? ($to, $self->uri) # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); my $captured; unless(run( command => $cmd, buffer => \$captured, verbose => $DEBUG ) ) { return $self->_error(loc("Command %1 failed: %2", "@$cmd" || '', $captured || '')); } return $to; } ################################# # # Error code # ################################# =pod =head2 $ff->error([BOOL]) Returns the last encountered error as string. Pass it a true value to get the C output instead. =cut ### error handling the way Archive::Extract does it sub _error { my $self = shift; my $error = shift; $self->_error_msg( $error ); $self->_error_msg_long( Carp::longmess($error) ); if( $WARN ) { carp $DEBUG ? $self->_error_msg_long : $self->_error_msg; } return; } sub error { my $self = shift; return shift() ? $self->_error_msg_long : $self->_error_msg; } 1; =pod =head1 HOW IT WORKS File::Fetch is able to fetch a variety of uris, by using several external programs and modules. Below is a mapping of what utilities will be used in what order for what schemes, if available: file => LWP, lftp, file http => LWP, HTTP::Lite, wget, curl, lftp, fetch, lynx, iosock ftp => LWP, Net::FTP, wget, curl, lftp, fetch, ncftp, ftp rsync => rsync git => git If you'd like to disable the use of one or more of these utilities and/or modules, see the C<$BLACKLIST> variable further down. If a utility or module isn't available, it will be marked in a cache (see the C<$METHOD_FAIL> variable further down), so it will not be tried again. The C method will only fail when all options are exhausted, and it was not able to retrieve the file. The C utility is available on FreeBSD. NetBSD and Dragonfly BSD may also have it from C. We only check for C on those three platforms. C is a very limited L based mechanism for retrieving C schemed urls. It doesn't follow redirects for instance. C only supports C style urls. A special note about fetching files from an ftp uri: By default, all ftp connections are done in passive mode. To change that, see the C<$FTP_PASSIVE> variable further down. Furthermore, ftp uris only support anonymous connections, so no named user/password pair can be passed along. C is blacklisted by default; see the C<$BLACKLIST> variable further down. =head1 GLOBAL VARIABLES The behaviour of File::Fetch can be altered by changing the following global variables: =head2 $File::Fetch::FROM_EMAIL This is the email address that will be sent as your anonymous ftp password. Default is C. =head2 $File::Fetch::USER_AGENT This is the useragent as C will report it. Default is C. =head2 $File::Fetch::FTP_PASSIVE This variable controls whether the environment variable C and any passive switches to commandline tools will be set to true. Default value is 1. Note: When $FTP_PASSIVE is true, C will not be used to fetch files, since passive mode can only be set interactively for this binary =head2 $File::Fetch::TIMEOUT When set, controls the network timeout (counted in seconds). Default value is 0. =head2 $File::Fetch::WARN This variable controls whether errors encountered internally by C should be C'd or not. Set to false to silence warnings. Inspect the output of the C method manually to see what went wrong. Defaults to C. =head2 $File::Fetch::DEBUG This enables debugging output when calling commandline utilities to fetch files. This also enables C errors, instead of the regular C errors. Good for tracking down why things don't work with your particular setup. Default is 0. =head2 $File::Fetch::BLACKLIST This is an array ref holding blacklisted modules/utilities for fetching files with. To disallow the use of, for example, C and C, you could set $File::Fetch::BLACKLIST to: $File::Fetch::BLACKLIST = [qw|lwp netftp|] The default blacklist is [qw|ftp|], as C is rather unreliable. See the note on C below. =head2 $File::Fetch::METHOD_FAIL This is a hashref registering what modules/utilities were known to fail for fetching files (mostly because they weren't installed). You can reset this cache by assigning an empty hashref to it, or individually remove keys. See the note on C below. =head1 MAPPING Here's a quick mapping for the utilities/modules, and their names for the $BLACKLIST, $METHOD_FAIL and other internal functions. LWP => lwp HTTP::Lite => httplite HTTP::Tiny => httptiny Net::FTP => netftp wget => wget lynx => lynx ncftp => ncftp ftp => ftp curl => curl rsync => rsync lftp => lftp fetch => fetch IO::Socket => iosock =head1 FREQUENTLY ASKED QUESTIONS =head2 So how do I use a proxy with File::Fetch? C currently only supports proxies with LWP::UserAgent. You will need to set your environment variables accordingly. For example, to use an ftp proxy: $ENV{ftp_proxy} = 'foo.com'; Refer to the LWP::UserAgent manpage for more details. =head2 I used 'lynx' to fetch a file, but its contents is all wrong! C can only fetch remote files by dumping its contents to C, which we in turn capture. If that content is a 'custom' error file (like, say, a C<404 handler>), you will get that contents instead. Sadly, C doesn't support any options to return a different exit code on non-C<200 OK> status, giving us no way to tell the difference between a 'successful' fetch and a custom error page. Therefor, we recommend to only use C as a last resort. This is why it is at the back of our list of methods to try as well. =head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do? C is relatively smart about things. When trying to write a file to disk, it removes the C (see the C method for details) from the file name before creating it. In most cases this suffices. If you have any other characters you need to escape, please install the C module from CPAN, and pre-encode your URI before passing it to C. You can read about the details of URIs and URI encoding here: http://www.faqs.org/rfcs/rfc2396.html =head1 TODO =over 4 =item Implement $PREFER_BIN To indicate to rather use commandline tools than modules =back =head1 BUG REPORTS Please report bugs or other issues to Ebug-file-fetch@rt.cpan.org. =head1 AUTHOR This module by Jos Boumans Ekane@cpan.orgE. =head1 COPYRIGHT This library is free software; you may redistribute and/or modify it under the same terms as Perl itself. =cut # Local variables: # c-indentation-style: bsd # c-basic-offset: 4 # indent-tabs-mode: nil # End: # vim: expandtab shiftwidth=4: Listing.pm000064400000024606147634421560006537 0ustar00package File::Listing; sub Version { $VERSION; } $VERSION = "6.04"; require Exporter; @ISA = qw(Exporter); @EXPORT = qw(parse_dir); use strict; use Carp (); use HTTP::Date qw(str2time); sub parse_dir ($;$$$) { my($dir, $tz, $fstype, $error) = @_; $fstype ||= 'unix'; $fstype = "File::Listing::" . lc $fstype; my @args = $_[0]; push(@args, $tz) if(@_ >= 2); push(@args, $error) if(@_ >= 4); $fstype->parse(@args); } sub line { Carp::croak("Not implemented yet"); } sub init { } # Dummy sub sub file_mode ($) { Carp::croak("Input to file_mode() must be a 10 character string.") unless length($_[0]) == 10; # This routine was originally borrowed from Graham Barr's # Net::FTP package. local $_ = shift; my $mode = 0; my($type); s/^(.)// and $type = $1; # When the set-group-ID bit (file mode bit 02000) is set, and the group # execution bit (file mode bit 00020) is unset, and it is a regular file, # some implementations of `ls' use the letter `S', others use `l' or `L'. # Convert this `S'. s/[Ll](...)$/S$1/; while (/(.)/g) { $mode <<= 1; $mode |= 1 if $1 ne "-" && $1 ne 'S' && $1 ne 'T'; } $mode |= 0004000 if /^..s....../i; $mode |= 0002000 if /^.....s.../i; $mode |= 0001000 if /^........t/i; # De facto standard definitions. From 'stat.h' on Solaris 9. $type eq "p" and $mode |= 0010000 or # fifo $type eq "c" and $mode |= 0020000 or # character special $type eq "d" and $mode |= 0040000 or # directory $type eq "b" and $mode |= 0060000 or # block special $type eq "-" and $mode |= 0100000 or # regular $type eq "l" and $mode |= 0120000 or # symbolic link $type eq "s" and $mode |= 0140000 or # socket $type eq "D" and $mode |= 0150000 or # door Carp::croak("Unknown file type: $type"); $mode; } sub parse { my($pkg, $dir, $tz, $error) = @_; # First let's try to determine what kind of dir parameter we have # received. We allow both listings, reference to arrays and # file handles to read from. if (ref($dir) eq 'ARRAY') { # Already splitted up } elsif (ref($dir) eq 'GLOB') { # A file handle } elsif (ref($dir)) { Carp::croak("Illegal argument to parse_dir()"); } elsif ($dir =~ /^\*\w+(::\w+)+$/) { # This scalar looks like a file handle, so we assume it is } else { # A normal scalar listing $dir = [ split(/\n/, $dir) ]; } $pkg->init(); my @files = (); if (ref($dir) eq 'ARRAY') { for (@$dir) { push(@files, $pkg->line($_, $tz, $error)); } } else { local($_); while (<$dir>) { chomp; push(@files, $pkg->line($_, $tz, $error)); } } wantarray ? @files : \@files; } package File::Listing::unix; use HTTP::Date qw(str2time); # A place to remember current directory from last line parsed. use vars qw($curdir @ISA); @ISA = qw(File::Listing); sub init { $curdir = ''; } sub line { shift; # package name local($_) = shift; my($tz, $error) = @_; s/\015//g; #study; my ($kind, $size, $date, $name); if (($kind, $size, $date, $name) = /^([\-FlrwxsStTdD]{10}) # Type and permission bits .* # Graps \D(\d+) # File size \s+ # Some space (\w{3}\s+\d+\s+(?:\d{1,2}:\d{2}|\d{4})|\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}) # Date \s+ # Some more space (.*)$ # File name /x ) { return if $name eq '.' || $name eq '..'; $name = "$curdir/$name" if length $curdir; my $type = '?'; if ($kind =~ /^l/ && $name =~ /(.*) -> (.*)/ ) { $name = $1; $type = "l $2"; } elsif ($kind =~ /^[\-F]/) { # (hopefully) a regular file $type = 'f'; } elsif ($kind =~ /^[dD]/) { $type = 'd'; $size = undef; # Don't believe the reported size } return [$name, $type, $size, str2time($date, $tz), File::Listing::file_mode($kind)]; } elsif (/^(.+):$/ && !/^[dcbsp].*\s.*\s.*:$/ ) { my $dir = $1; return () if $dir eq '.'; $curdir = $dir; return (); } elsif (/^[Tt]otal\s+(\d+)$/ || /^\s*$/) { return (); } elsif (/not found/ || # OSF1, HPUX, and SunOS return # "$file not found" /No such file/ || # IRIX returns # "UX:ls: ERROR: Cannot access $file: No such file or directory" # Solaris returns # "$file: No such file or directory" /cannot find/ # Windows NT returns # "The system cannot find the path specified." ) { return () unless defined $error; &$error($_) if ref($error) eq 'CODE'; warn "Error: $_\n" if $error eq 'warn'; return (); } elsif ($_ eq '') { # AIX, and Linux return nothing return () unless defined $error; &$error("No such file or directory") if ref($error) eq 'CODE'; warn "Warning: No such file or directory\n" if $error eq 'warn'; return (); } else { # parse failed, check if the dosftp parse understands it File::Listing::dosftp->init(); return(File::Listing::dosftp->line($_,$tz,$error)); } } package File::Listing::dosftp; use HTTP::Date qw(str2time); # A place to remember current directory from last line parsed. use vars qw($curdir @ISA); @ISA = qw(File::Listing); sub init { $curdir = ''; } sub line { shift; # package name local($_) = shift; my($tz, $error) = @_; s/\015//g; my ($date, $size_or_dir, $name, $size); # 02-05-96 10:48AM 1415 src.slf # 09-10-96 09:18AM sl_util if (($date, $size_or_dir, $name) = /^(\d\d-\d\d-\d\d\s+\d\d:\d\d\wM) # Date and time info \s+ # Some space (<\w{3}>|\d+) # Dir or Size \s+ # Some more space (.+)$ # File name /x ) { return if $name eq '.' || $name eq '..'; $name = "$curdir/$name" if length $curdir; my $type = '?'; if ($size_or_dir eq '') { $type = "d"; $size = ""; # directories have no size in the pc listing } else { $type = 'f'; $size = $size_or_dir; } return [$name, $type, $size, str2time($date, $tz), undef]; } else { return () unless defined $error; &$error($_) if ref($error) eq 'CODE'; warn "Can't parse: $_\n" if $error eq 'warn'; return (); } } package File::Listing::vms; @File::Listing::vms::ISA = qw(File::Listing); package File::Listing::netware; @File::Listing::netware::ISA = qw(File::Listing); package File::Listing::apache; use vars qw(@ISA); @ISA = qw(File::Listing); sub init { } sub line { shift; # package name local($_) = shift; my($tz, $error) = @_; # ignored for now... s!]*>! !g; # clean away various table stuff if (m!.*.*?(\d+)-([a-zA-Z]+|\d+)-(\d+)\s+(\d+):(\d+)\s+(?:([\d\.]+[kMG]?|-))!i) { my($filename, $filesize) = ($1, $7); my($d,$m,$y, $H,$M) = ($2,$3,$4,$5,$6); if ($m =~ /^\d+$/) { ($d,$y) = ($y,$d) # iso date } else { $m = _monthabbrev_number($m); } $filesize = 0 if $filesize eq '-'; if ($filesize =~ s/k$//i) { $filesize *= 1024; } elsif ($filesize =~ s/M$//) { $filesize *= 1024*1024; } elsif ($filesize =~ s/G$//) { $filesize *= 1024*1024*1024; } $filesize = int $filesize; require Time::Local; my $filetime = Time::Local::timelocal(0,$M,$H,$d,$m-1,_guess_year($y)-1900); my $filetype = ($filename =~ s|/$|| ? "d" : "f"); return [$filename, $filetype, $filesize, $filetime, undef]; } return (); } sub _guess_year { my $y = shift; if ($y >= 90) { $y = 1900+$y; } elsif ($y < 100) { $y = 2000+$y; } $y; } sub _monthabbrev_number { my $mon = shift; +{'Jan' => 1, 'Feb' => 2, 'Mar' => 3, 'Apr' => 4, 'May' => 5, 'Jun' => 6, 'Jul' => 7, 'Aug' => 8, 'Sep' => 9, 'Oct' => 10, 'Nov' => 11, 'Dec' => 12, }->{$mon}; } 1; __END__ =head1 NAME File::Listing - parse directory listing =head1 SYNOPSIS use File::Listing qw(parse_dir); $ENV{LANG} = "C"; # dates in non-English locales not supported for (parse_dir(`ls -l`)) { ($name, $type, $size, $mtime, $mode) = @$_; next if $type ne 'f'; # plain file #... } # directory listing can also be read from a file open(LISTING, "zcat ls-lR.gz|"); $dir = parse_dir(\*LISTING, '+0000'); =head1 DESCRIPTION This module exports a single function called parse_dir(), which can be used to parse directory listings. The first parameter to parse_dir() is the directory listing to parse. It can be a scalar, a reference to an array of directory lines or a glob representing a filehandle to read the directory listing from. The second parameter is the time zone to use when parsing time stamps in the listing. If this value is undefined, then the local time zone is assumed. The third parameter is the type of listing to assume. Currently supported formats are 'unix', 'apache' and 'dosftp'. The default value is 'unix'. Ideally, the listing type should be determined automatically. The fourth parameter specifies how unparseable lines should be treated. Values can be 'ignore', 'warn' or a code reference. Warn means that the perl warn() function will be called. If a code reference is passed, then this routine will be called and the return value from it will be incorporated in the listing. The default is 'ignore'. Only the first parameter is mandatory. The return value from parse_dir() is a list of directory entries. In a scalar context the return value is a reference to the list. The directory entries are represented by an array consisting of [ $filename, $filetype, $filesize, $filetime, $filemode ]. The $filetype value is one of the letters 'f', 'd', 'l' or '?'. The $filetime value is the seconds since Jan 1, 1970. The $filemode is a bitmask like the mode returned by stat(). =head1 COPYRIGHT Copyright 1996-2010, Gisle Aas Based on lsparse.pl (from Lee McLoughlin's ftp mirror package) and Net::FTP's parse_dir (Graham Barr). This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Path.pm000064400000100643147634421560006016 0ustar00package File::Path; use 5.005_04; use strict; use Cwd 'getcwd'; use File::Basename (); use File::Spec (); BEGIN { if ($] < 5.006) { # can't say 'opendir my $dh, $dirname' # need to initialise $dh eval "use Symbol"; } } use Exporter (); use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); $VERSION = '2.09'; @ISA = qw(Exporter); @EXPORT = qw(mkpath rmtree); @EXPORT_OK = qw(make_path remove_tree); my $Is_VMS = $^O eq 'VMS'; my $Is_MacOS = $^O eq 'MacOS'; # These OSes complain if you want to remove a file that you have no # write permission to: my $Force_Writeable = grep {$^O eq $_} qw(amigaos dos epoc MSWin32 MacOS os2); # Unix-like systems need to stat each directory in order to detect # race condition. MS-Windows is immune to this particular attack. my $Need_Stat_Check = !($^O eq 'MSWin32'); sub _carp { require Carp; goto &Carp::carp; } sub _croak { require Carp; goto &Carp::croak; } sub _error { my $arg = shift; my $message = shift; my $object = shift; if ($arg->{error}) { $object = '' unless defined $object; $message .= ": $!" if $!; push @{${$arg->{error}}}, {$object => $message}; } else { _carp(defined($object) ? "$message for $object: $!" : "$message: $!"); } } sub make_path { push @_, {} unless @_ and UNIVERSAL::isa($_[-1],'HASH'); goto &mkpath; } sub mkpath { my $old_style = !(@_ and UNIVERSAL::isa($_[-1],'HASH')); my $arg; my $paths; if ($old_style) { my ($verbose, $mode); ($paths, $verbose, $mode) = @_; $paths = [$paths] unless UNIVERSAL::isa($paths,'ARRAY'); $arg->{verbose} = $verbose; $arg->{mode} = defined $mode ? $mode : 0777; } else { $arg = pop @_; $arg->{mode} = delete $arg->{mask} if exists $arg->{mask}; $arg->{mode} = 0777 unless exists $arg->{mode}; ${$arg->{error}} = [] if exists $arg->{error}; $arg->{owner} = delete $arg->{user} if exists $arg->{user}; $arg->{owner} = delete $arg->{uid} if exists $arg->{uid}; if (exists $arg->{owner} and $arg->{owner} =~ /\D/) { my $uid = (getpwnam $arg->{owner})[2]; if (defined $uid) { $arg->{owner} = $uid; } else { _error($arg, "unable to map $arg->{owner} to a uid, ownership not changed"); delete $arg->{owner}; } } if (exists $arg->{group} and $arg->{group} =~ /\D/) { my $gid = (getgrnam $arg->{group})[2]; if (defined $gid) { $arg->{group} = $gid; } else { _error($arg, "unable to map $arg->{group} to a gid, group ownership not changed"); delete $arg->{group}; } } if (exists $arg->{owner} and not exists $arg->{group}) { $arg->{group} = -1; # chown will leave group unchanged } if (exists $arg->{group} and not exists $arg->{owner}) { $arg->{owner} = -1; # chown will leave owner unchanged } $paths = [@_]; } return _mkpath($arg, $paths); } sub _mkpath { my $arg = shift; my $paths = shift; my(@created,$path); foreach $path (@$paths) { next unless defined($path) and length($path); $path .= '/' if $^O eq 'os2' and $path =~ /^\w:\z/s; # feature of CRT # Logic wants Unix paths, so go with the flow. if ($Is_VMS) { next if $path eq '/'; $path = VMS::Filespec::unixify($path); } next if -d $path; my $parent = File::Basename::dirname($path); unless (-d $parent or $path eq $parent) { push(@created,_mkpath($arg, [$parent])); } print "mkdir $path\n" if $arg->{verbose}; if (mkdir($path,$arg->{mode})) { push(@created, $path); if (exists $arg->{owner}) { # NB: $arg->{group} guaranteed to be set during initialisation if (!chown $arg->{owner}, $arg->{group}, $path) { _error($arg, "Cannot change ownership of $path to $arg->{owner}:$arg->{group}"); } } } else { my $save_bang = $!; my ($e, $e1) = ($save_bang, $^E); $e .= "; $e1" if $e ne $e1; # allow for another process to have created it meanwhile if (!-d $path) { $! = $save_bang; if ($arg->{error}) { push @{${$arg->{error}}}, {$path => $e}; } else { _croak("mkdir $path: $e"); } } } } return @created; } sub remove_tree { push @_, {} unless @_ and UNIVERSAL::isa($_[-1],'HASH'); goto &rmtree; } sub _is_subdir { my($dir, $test) = @_; my($dv, $dd) = File::Spec->splitpath($dir, 1); my($tv, $td) = File::Spec->splitpath($test, 1); # not on same volume return 0 if $dv ne $tv; my @d = File::Spec->splitdir($dd); my @t = File::Spec->splitdir($td); # @t can't be a subdir if it's shorter than @d return 0 if @t < @d; return join('/', @d) eq join('/', splice @t, 0, +@d); } sub rmtree { my $old_style = !(@_ and UNIVERSAL::isa($_[-1],'HASH')); my $arg; my $paths; if ($old_style) { my ($verbose, $safe); ($paths, $verbose, $safe) = @_; $arg->{verbose} = $verbose; $arg->{safe} = defined $safe ? $safe : 0; if (defined($paths) and length($paths)) { $paths = [$paths] unless UNIVERSAL::isa($paths,'ARRAY'); } else { _carp ("No root path(s) specified\n"); return 0; } } else { $arg = pop @_; ${$arg->{error}} = [] if exists $arg->{error}; ${$arg->{result}} = [] if exists $arg->{result}; $paths = [@_]; } $arg->{prefix} = ''; $arg->{depth} = 0; my @clean_path; $arg->{cwd} = getcwd() or do { _error($arg, "cannot fetch initial working directory"); return 0; }; for ($arg->{cwd}) { /\A(.*)\Z/; $_ = $1 } # untaint for my $p (@$paths) { # need to fixup case and map \ to / on Windows my $ortho_root = $^O eq 'MSWin32' ? _slash_lc($p) : $p; my $ortho_cwd = $^O eq 'MSWin32' ? _slash_lc($arg->{cwd}) : $arg->{cwd}; my $ortho_root_length = length($ortho_root); $ortho_root_length-- if $^O eq 'VMS'; # don't compare '.' with ']' if ($ortho_root_length && _is_subdir($ortho_root, $ortho_cwd)) { local $! = 0; _error($arg, "cannot remove path when cwd is $arg->{cwd}", $p); next; } if ($Is_MacOS) { $p = ":$p" unless $p =~ /:/; $p .= ":" unless $p =~ /:\z/; } elsif ($^O eq 'MSWin32') { $p =~ s{[/\\]\z}{}; } else { $p =~ s{/\z}{}; } push @clean_path, $p; } @{$arg}{qw(device inode perm)} = (lstat $arg->{cwd})[0,1] or do { _error($arg, "cannot stat initial working directory", $arg->{cwd}); return 0; }; return _rmtree($arg, \@clean_path); } sub _rmtree { my $arg = shift; my $paths = shift; my $count = 0; my $curdir = File::Spec->curdir(); my $updir = File::Spec->updir(); my (@files, $root); ROOT_DIR: foreach $root (@$paths) { # since we chdir into each directory, it may not be obvious # to figure out where we are if we generate a message about # a file name. We therefore construct a semi-canonical # filename, anchored from the directory being unlinked (as # opposed to being truly canonical, anchored from the root (/). my $canon = $arg->{prefix} ? File::Spec->catfile($arg->{prefix}, $root) : $root ; my ($ldev, $lino, $perm) = (lstat $root)[0,1,2] or next ROOT_DIR; if ( -d _ ) { $root = VMS::Filespec::vmspath(VMS::Filespec::pathify($root)) if $Is_VMS; if (!chdir($root)) { # see if we can escalate privileges to get in # (e.g. funny protection mask such as -w- instead of rwx) $perm &= 07777; my $nperm = $perm | 0700; if (!($arg->{safe} or $nperm == $perm or chmod($nperm, $root))) { _error($arg, "cannot make child directory read-write-exec", $canon); next ROOT_DIR; } elsif (!chdir($root)) { _error($arg, "cannot chdir to child", $canon); next ROOT_DIR; } } my ($cur_dev, $cur_inode, $perm) = (stat $curdir)[0,1,2] or do { _error($arg, "cannot stat current working directory", $canon); next ROOT_DIR; }; if ($Need_Stat_Check) { ($ldev eq $cur_dev and $lino eq $cur_inode) or _croak("directory $canon changed before chdir, expected dev=$ldev ino=$lino, actual dev=$cur_dev ino=$cur_inode, aborting."); } $perm &= 07777; # don't forget setuid, setgid, sticky bits my $nperm = $perm | 0700; # notabene: 0700 is for making readable in the first place, # it's also intended to change it to writable in case we have # to recurse in which case we are better than rm -rf for # subtrees with strange permissions if (!($arg->{safe} or $nperm == $perm or chmod($nperm, $curdir))) { _error($arg, "cannot make directory read+writeable", $canon); $nperm = $perm; } my $d; $d = gensym() if $] < 5.006; if (!opendir $d, $curdir) { _error($arg, "cannot opendir", $canon); @files = (); } else { no strict 'refs'; if (!defined ${"\cTAINT"} or ${"\cTAINT"}) { # Blindly untaint dir names if taint mode is # active, or any perl < 5.006 @files = map { /\A(.*)\z/s; $1 } readdir $d; } else { @files = readdir $d; } closedir $d; } if ($Is_VMS) { # Deleting large numbers of files from VMS Files-11 # filesystems is faster if done in reverse ASCIIbetical order. # include '.' to '.;' from blead patch #31775 @files = map {$_ eq '.' ? '.;' : $_} reverse @files; } @files = grep {$_ ne $updir and $_ ne $curdir} @files; if (@files) { # remove the contained files before the directory itself my $narg = {%$arg}; @{$narg}{qw(device inode cwd prefix depth)} = ($cur_dev, $cur_inode, $updir, $canon, $arg->{depth}+1); $count += _rmtree($narg, \@files); } # restore directory permissions of required now (in case the rmdir # below fails), while we are still in the directory and may do so # without a race via '.' if ($nperm != $perm and not chmod($perm, $curdir)) { _error($arg, "cannot reset chmod", $canon); } # don't leave the client code in an unexpected directory chdir($arg->{cwd}) or _croak("cannot chdir to $arg->{cwd} from $canon: $!, aborting."); # ensure that a chdir upwards didn't take us somewhere other # than we expected (see CVE-2002-0435) ($cur_dev, $cur_inode) = (stat $curdir)[0,1] or _croak("cannot stat prior working directory $arg->{cwd}: $!, aborting."); if ($Need_Stat_Check) { ($arg->{device} eq $cur_dev and $arg->{inode} eq $cur_inode) or _croak("previous directory $arg->{cwd} changed before entering $canon, expected dev=$ldev ino=$lino, actual dev=$cur_dev ino=$cur_inode, aborting."); } if ($arg->{depth} or !$arg->{keep_root}) { if ($arg->{safe} && ($Is_VMS ? !&VMS::Filespec::candelete($root) : !-w $root)) { print "skipped $root\n" if $arg->{verbose}; next ROOT_DIR; } if ($Force_Writeable and !chmod $perm | 0700, $root) { _error($arg, "cannot make directory writeable", $canon); } print "rmdir $root\n" if $arg->{verbose}; if (rmdir $root) { push @{${$arg->{result}}}, $root if $arg->{result}; ++$count; } else { _error($arg, "cannot remove directory", $canon); if ($Force_Writeable && !chmod($perm, ($Is_VMS ? VMS::Filespec::fileify($root) : $root)) ) { _error($arg, sprintf("cannot restore permissions to 0%o",$perm), $canon); } } } } else { # not a directory $root = VMS::Filespec::vmsify("./$root") if $Is_VMS && !File::Spec->file_name_is_absolute($root) && ($root !~ m/(?]+/); # not already in VMS syntax if ($arg->{safe} && ($Is_VMS ? !&VMS::Filespec::candelete($root) : !(-l $root || -w $root))) { print "skipped $root\n" if $arg->{verbose}; next ROOT_DIR; } my $nperm = $perm & 07777 | 0600; if ($Force_Writeable and $nperm != $perm and not chmod $nperm, $root) { _error($arg, "cannot make file writeable", $canon); } print "unlink $canon\n" if $arg->{verbose}; # delete all versions under VMS for (;;) { if (unlink $root) { push @{${$arg->{result}}}, $root if $arg->{result}; } else { _error($arg, "cannot unlink file", $canon); $Force_Writeable and chmod($perm, $root) or _error($arg, sprintf("cannot restore permissions to 0%o",$perm), $canon); last; } ++$count; last unless $Is_VMS && lstat $root; } } } return $count; } sub _slash_lc { # fix up slashes and case on MSWin32 so that we can determine that # c:\path\to\dir is underneath C:/Path/To my $path = shift; $path =~ tr{\\}{/}; return lc($path); } 1; __END__ =head1 NAME File::Path - Create or remove directory trees =head1 VERSION This document describes version 2.09 of File::Path, released 2013-01-17. =head1 SYNOPSIS use File::Path qw(make_path remove_tree); make_path('foo/bar/baz', '/zug/zwang'); make_path('foo/bar/baz', '/zug/zwang', { verbose => 1, mode => 0711, }); remove_tree('foo/bar/baz', '/zug/zwang'); remove_tree('foo/bar/baz', '/zug/zwang', { verbose => 1, error => \my $err_list, }); # legacy (interface promoted before v2.00) mkpath('/foo/bar/baz'); mkpath('/foo/bar/baz', 1, 0711); mkpath(['/foo/bar/baz', 'blurfl/quux'], 1, 0711); rmtree('foo/bar/baz', 1, 1); rmtree(['foo/bar/baz', 'blurfl/quux'], 1, 1); # legacy (interface promoted before v2.06) mkpath('foo/bar/baz', '/zug/zwang', { verbose => 1, mode => 0711 }); rmtree('foo/bar/baz', '/zug/zwang', { verbose => 1, mode => 0711 }); =head1 DESCRIPTION This module provide a convenient way to create directories of arbitrary depth and to delete an entire directory subtree from the filesystem. The following functions are provided: =over =item make_path( $dir1, $dir2, .... ) =item make_path( $dir1, $dir2, ...., \%opts ) The C function creates the given directories if they don't exists before, much like the Unix command C. The function accepts a list of directories to be created. Its behaviour may be tuned by an optional hashref appearing as the last parameter on the call. The function returns the list of directories actually created during the call; in scalar context the number of directories created. The following keys are recognised in the option hash: =over =item mode => $num The numeric permissions mode to apply to each created directory (defaults to 0777), to be modified by the current C. If the directory already exists (and thus does not need to be created), the permissions will not be modified. C is recognised as an alias for this parameter. =item verbose => $bool If present, will cause C to print the name of each directory as it is created. By default nothing is printed. =item error => \$err If present, it should be a reference to a scalar. This scalar will be made to reference an array, which will be used to store any errors that are encountered. See the L section for more information. If this parameter is not used, certain error conditions may raise a fatal error that will cause the program will halt, unless trapped in an C block. =item owner => $owner =item user => $owner =item uid => $owner If present, will cause any created directory to be owned by C<$owner>. If the value is numeric, it will be interpreted as a uid, otherwise as username is assumed. An error will be issued if the username cannot be mapped to a uid, or the uid does not exist, or the process lacks the privileges to change ownership. Ownwership of directories that already exist will not be changed. C and C are aliases of C. =item group => $group If present, will cause any created directory to be owned by the group C<$group>. If the value is numeric, it will be interpreted as a gid, otherwise as group name is assumed. An error will be issued if the group name cannot be mapped to a gid, or the gid does not exist, or the process lacks the privileges to change group ownership. Group ownwership of directories that already exist will not be changed. make_path '/var/tmp/webcache', {owner=>'nobody', group=>'nogroup'}; =back =item mkpath( $dir ) =item mkpath( $dir, $verbose, $mode ) =item mkpath( [$dir1, $dir2,...], $verbose, $mode ) =item mkpath( $dir1, $dir2,..., \%opt ) The mkpath() function provide the legacy interface of make_path() with a different interpretation of the arguments passed. The behaviour and return value of the function is otherwise identical to make_path(). =item remove_tree( $dir1, $dir2, .... ) =item remove_tree( $dir1, $dir2, ...., \%opts ) The C function deletes the given directories and any files and subdirectories they might contain, much like the Unix command C or C on Windows. The function accepts a list of directories to be removed. Its behaviour may be tuned by an optional hashref appearing as the last parameter on the call. The functions returns the number of files successfully deleted. The following keys are recognised in the option hash: =over =item verbose => $bool If present, will cause C to print the name of each file as it is unlinked. By default nothing is printed. =item safe => $bool When set to a true value, will cause C to skip the files for which the process lacks the required privileges needed to delete files, such as delete privileges on VMS. In other words, the code will make no attempt to alter file permissions. Thus, if the process is interrupted, no filesystem object will be left in a more permissive mode. =item keep_root => $bool When set to a true value, will cause all files and subdirectories to be removed, except the initially specified directories. This comes in handy when cleaning out an application's scratch directory. remove_tree( '/tmp', {keep_root => 1} ); =item result => \$res If present, it should be a reference to a scalar. This scalar will be made to reference an array, which will be used to store all files and directories unlinked during the call. If nothing is unlinked, the array will be empty. remove_tree( '/tmp', {result => \my $list} ); print "unlinked $_\n" for @$list; This is a useful alternative to the C key. =item error => \$err If present, it should be a reference to a scalar. This scalar will be made to reference an array, which will be used to store any errors that are encountered. See the L section for more information. Removing things is a much more dangerous proposition than creating things. As such, there are certain conditions that C may encounter that are so dangerous that the only sane action left is to kill the program. Use C to trap all that is reasonable (problems with permissions and the like), and let it die if things get out of hand. This is the safest course of action. =back =item rmtree( $dir ) =item rmtree( $dir, $verbose, $safe ) =item rmtree( [$dir1, $dir2,...], $verbose, $safe ) =item rmtree( $dir1, $dir2,..., \%opt ) The rmtree() function provide the legacy interface of remove_tree() with a different interpretation of the arguments passed. The behaviour and return value of the function is otherwise identical to remove_tree(). =back =head2 ERROR HANDLING =over 4 =item B The following error handling mechanism is considered experimental and is subject to change pending feedback from users. =back If C or C encounter an error, a diagnostic message will be printed to C via C (for non-fatal errors), or via C (for fatal errors). If this behaviour is not desirable, the C attribute may be used to hold a reference to a variable, which will be used to store the diagnostics. The variable is made a reference to an array of hash references. Each hash contain a single key/value pair where the key is the name of the file, and the value is the error message (including the contents of C<$!> when appropriate). If a general error is encountered the diagnostic key will be empty. An example usage looks like: remove_tree( 'foo/bar', 'bar/rat', {error => \my $err} ); if (@$err) { for my $diag (@$err) { my ($file, $message) = %$diag; if ($file eq '') { print "general error: $message\n"; } else { print "problem unlinking $file: $message\n"; } } } else { print "No error encountered\n"; } Note that if no errors are encountered, C<$err> will reference an empty array. This means that C<$err> will always end up TRUE; so you need to test C<@$err> to determine if errors occured. =head2 NOTES C blindly exports C and C into the current namespace. These days, this is considered bad style, but to change it now would break too much code. Nonetheless, you are invited to specify what it is you are expecting to use: use File::Path 'rmtree'; The routines C and C are B exported by default. You must specify which ones you want to use. use File::Path 'remove_tree'; Note that a side-effect of the above is that C and C are no longer exported at all. This is due to the way the C module works. If you are migrating a codebase to use the new interface, you will have to list everything explicitly. But that's just good practice anyway. use File::Path qw(remove_tree rmtree); =head3 API CHANGES The API was changed in the 2.0 branch. For a time, C and C tried, unsuccessfully, to deal with the two different calling mechanisms. This approach was considered a failure. The new semantics are now only available with C and C. The old semantics are only available through C and C. Users are strongly encouraged to upgrade to at least 2.08 in order to avoid surprises. =head3 SECURITY CONSIDERATIONS There were race conditions 1.x implementations of File::Path's C function (although sometimes patched depending on the OS distribution or platform). The 2.0 version contains code to avoid the problem mentioned in CVE-2002-0435. See the following pages for more information: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=286905 http://www.nntp.perl.org/group/perl.perl5.porters/2005/01/msg97623.html http://www.debian.org/security/2005/dsa-696 Additionally, unless the C parameter is set (or the third parameter in the traditional interface is TRUE), should a C be interrupted, files that were originally in read-only mode may now have their permissions set to a read-write (or "delete OK") mode. =head1 DIAGNOSTICS FATAL errors will cause the program to halt (C), since the problem is so severe that it would be dangerous to continue. (This can always be trapped with C, but it's not a good idea. Under the circumstances, dying is the best thing to do). SEVERE errors may be trapped using the modern interface. If the they are not trapped, or the old interface is used, such an error will cause the program will halt. All other errors may be trapped using the modern interface, otherwise they will be Ced about. Program execution will not be halted. =over 4 =item mkdir [path]: [errmsg] (SEVERE) C was unable to create the path. Probably some sort of permissions error at the point of departure, or insufficient resources (such as free inodes on Unix). =item No root path(s) specified C was not given any paths to create. This message is only emitted if the routine is called with the traditional interface. The modern interface will remain silent if given nothing to do. =item No such file or directory On Windows, if C gives you this warning, it may mean that you have exceeded your filesystem's maximum path length. =item cannot fetch initial working directory: [errmsg] C attempted to determine the initial directory by calling C, but the call failed for some reason. No attempt will be made to delete anything. =item cannot stat initial working directory: [errmsg] C attempted to stat the initial directory (after having successfully obtained its name via C), however, the call failed for some reason. No attempt will be made to delete anything. =item cannot chdir to [dir]: [errmsg] C attempted to set the working directory in order to begin deleting the objects therein, but was unsuccessful. This is usually a permissions issue. The routine will continue to delete other things, but this directory will be left intact. =item directory [dir] changed before chdir, expected dev=[n] ino=[n], actual dev=[n] ino=[n], aborting. (FATAL) C recorded the device and inode of a directory, and then moved into it. It then performed a C on the current directory and detected that the device and inode were no longer the same. As this is at the heart of the race condition problem, the program will die at this point. =item cannot make directory [dir] read+writeable: [errmsg] C attempted to change the permissions on the current directory to ensure that subsequent unlinkings would not run into problems, but was unable to do so. The permissions remain as they were, and the program will carry on, doing the best it can. =item cannot read [dir]: [errmsg] C tried to read the contents of the directory in order to acquire the names of the directory entries to be unlinked, but was unsuccessful. This is usually a permissions issue. The program will continue, but the files in this directory will remain after the call. =item cannot reset chmod [dir]: [errmsg] C, after having deleted everything in a directory, attempted to restore its permissions to the original state but failed. The directory may wind up being left behind. =item cannot remove [dir] when cwd is [dir] The current working directory of the program is F and you are attempting to remove an ancestor, such as F. The directory tree is left untouched. The solution is to C out of the child directory to a place outside the directory tree to be removed. =item cannot chdir to [parent-dir] from [child-dir]: [errmsg], aborting. (FATAL) C, after having deleted everything and restored the permissions of a directory, was unable to chdir back to the parent. The program halts to avoid a race condition from occurring. =item cannot stat prior working directory [dir]: [errmsg], aborting. (FATAL) C was unable to stat the parent directory after have returned from the child. Since there is no way of knowing if we returned to where we think we should be (by comparing device and inode) the only way out is to C. =item previous directory [parent-dir] changed before entering [child-dir], expected dev=[n] ino=[n], actual dev=[n] ino=[n], aborting. (FATAL) When C returned from deleting files in a child directory, a check revealed that the parent directory it returned to wasn't the one it started out from. This is considered a sign of malicious activity. =item cannot make directory [dir] writeable: [errmsg] Just before removing a directory (after having successfully removed everything it contained), C attempted to set the permissions on the directory to ensure it could be removed and failed. Program execution continues, but the directory may possibly not be deleted. =item cannot remove directory [dir]: [errmsg] C attempted to remove a directory, but failed. This may because some objects that were unable to be removed remain in the directory, or a permissions issue. The directory will be left behind. =item cannot restore permissions of [dir] to [0nnn]: [errmsg] After having failed to remove a directory, C was unable to restore its permissions from a permissive state back to a possibly more restrictive setting. (Permissions given in octal). =item cannot make file [file] writeable: [errmsg] C attempted to force the permissions of a file to ensure it could be deleted, but failed to do so. It will, however, still attempt to unlink the file. =item cannot unlink file [file]: [errmsg] C failed to remove a file. Probably a permissions issue. =item cannot restore permissions of [file] to [0nnn]: [errmsg] After having failed to remove a file, C was also unable to restore the permissions on the file to a possibly less permissive setting. (Permissions given in octal). =item unable to map [owner] to a uid, ownership not changed"); C was instructed to give the ownership of created directories to the symbolic name [owner], but C did not return the corresponding numeric uid. The directory will be created, but ownership will not be changed. =item unable to map [group] to a gid, group ownership not changed C was instructed to give the group ownership of created directories to the symbolic name [group], but C did not return the corresponding numeric gid. The directory will be created, but group ownership will not be changed. =back =head1 SEE ALSO =over 4 =item * L Allows files and directories to be moved to the Trashcan/Recycle Bin (where they may later be restored if necessary) if the operating system supports such functionality. This feature may one day be made available directly in C. =item * L When removing directory trees, if you want to examine each file to decide whether to delete it (and possibly leaving large swathes alone), F offers a convenient and flexible approach to examining directory trees. =back =head1 BUGS Please report all bugs on the RT queue: L You can also send pull requests to the Github repository: L =head1 ACKNOWLEDGEMENTS Paul Szabo identified the race condition originally, and Brendan O'Dea wrote an implementation for Debian that addressed the problem. That code was used as a basis for the current code. Their efforts are greatly appreciated. Gisle Aas made a number of improvements to the documentation for 2.07 and his advice and assistance is also greatly appreciated. =head1 AUTHORS Tim Bunce and Charles Bailey. Currently maintained by David Landgren >. =head1 COPYRIGHT This module is copyright (C) Charles Bailey, Tim Bunce and David Landgren 1995-2013. All rights reserved. =head1 LICENSE This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Which.pm000064400000015032147634421560006161 0ustar00package File::Which; use 5.004; use strict; use Exporter (); use File::Spec (); use vars qw{$VERSION @ISA @EXPORT @EXPORT_OK}; BEGIN { $VERSION = '1.09'; @ISA = 'Exporter'; @EXPORT = 'which'; @EXPORT_OK = 'where'; } use constant IS_VMS => ($^O eq 'VMS'); use constant IS_MAC => ($^O eq 'MacOS'); use constant IS_DOS => ($^O eq 'MSWin32' or $^O eq 'dos' or $^O eq 'os2'); # For Win32 systems, stores the extensions used for # executable files # For others, the empty string is used # because 'perl' . '' eq 'perl' => easier my @PATHEXT = (''); if ( IS_DOS ) { # WinNT. PATHEXT might be set on Cygwin, but not used. if ( $ENV{PATHEXT} ) { push @PATHEXT, split ';', $ENV{PATHEXT}; } else { # Win9X or other: doesn't have PATHEXT, so needs hardcoded. push @PATHEXT, qw{.com .exe .bat}; } } elsif ( IS_VMS ) { push @PATHEXT, qw{.exe .com}; } sub which { my ($exec) = @_; return undef unless $exec; my $all = wantarray; my @results = (); # check for aliases first if ( IS_VMS ) { my $symbol = `SHOW SYMBOL $exec`; chomp($symbol); unless ( $? ) { return $symbol unless $all; push @results, $symbol; } } if ( IS_MAC ) { my @aliases = split /\,/, $ENV{Aliases}; foreach my $alias ( @aliases ) { # This has not been tested!! # PPT which says MPW-Perl cannot resolve `Alias $alias`, # let's just hope it's fixed if ( lc($alias) eq lc($exec) ) { chomp(my $file = `Alias $alias`); last unless $file; # if it failed, just go on the normal way return $file unless $all; push @results, $file; # we can stop this loop as if it finds more aliases matching, # it'll just be the same result anyway last; } } } my @path = File::Spec->path; if ( IS_DOS or IS_VMS or IS_MAC ) { unshift @path, File::Spec->curdir; } foreach my $base ( map { File::Spec->catfile($_, $exec) } @path ) { for my $ext ( @PATHEXT ) { my $file = $base.$ext; # We don't want dirs (as they are -x) next if -d $file; if ( # Executable, normal case -x _ or ( # MacOS doesn't mark as executable so we check -e IS_MAC || ( IS_DOS and grep { $file =~ /$_\z/i } @PATHEXT[1..$#PATHEXT] ) # DOSish systems don't pass -x on # non-exe/bat/com files. so we check -e. # However, we don't want to pass -e on files # that aren't in PATHEXT, like README. and -e _ ) ) { return $file unless $all; push @results, $file; } } } if ( $all ) { return @results; } else { return undef; } } sub where { # force wantarray my @res = which($_[0]); return @res; } 1; __END__ =pod =head1 NAME File::Which - Portable implementation of the `which' utility =head1 SYNOPSIS use File::Which; # exports which() use File::Which qw(which where); # exports which() and where() my $exe_path = which('perldoc'); my @paths = where('perl'); - Or - my @paths = which('perl'); # an array forces search for all of them =head1 DESCRIPTION C was created to be able to get the paths to executable programs on systems under which the `which' program wasn't implemented in the shell. C searches the directories of the user's C (as returned by Cpath()>), looking for executable files having the name specified as a parameter to C. Under Win32 systems, which do not have a notion of directly executable files, but uses special extensions such as C<.exe> and C<.bat> to identify them, C takes extra steps to assure that you will find the correct file (so for example, you might be searching for C, it'll try F, F, etc.) =head1 Steps Used on Win32, DOS, OS2 and VMS =head2 Windows NT Windows NT has a special environment variable called C, which is used by the shell to look for executable files. Usually, it will contain a list in the form C<.EXE;.BAT;.COM;.JS;.VBS> etc. If C finds such an environment variable, it parses the list and uses it as the different extensions. =head2 Windows 9x and other ancient Win/DOS/OS2 This set of operating systems don't have the C variable, and usually you will find executable files there with the extensions C<.exe>, C<.bat> and (less likely) C<.com>. C uses this hardcoded list if it's running under Win32 but does not find a C variable. =head2 VMS Same case as Windows 9x: uses C<.exe> and C<.com> (in that order). =head1 Functions =head2 which($short_exe_name) Exported by default. C<$short_exe_name> is the name used in the shell to call the program (for example, C). If it finds an executable with the name you specified, C will return the absolute path leading to this executable (for example, F or F). If it does I find the executable, it returns C. If C is called in list context, it will return I the matches. =head2 where($short_exe_name) Not exported by default. Same as C in array context. Same as the C<`where'> utility, will return an array containing all the path names matching C<$short_exe_name>. =head1 BUGS AND CAVEATS Not tested on VMS or MacOS, although there is platform specific code for those. Anyone who haves a second would be very kind to send me a report of how it went. File::Spec adds the current directory to the front of PATH if on Win32, VMS or MacOS. I have no knowledge of those so don't know if the current directory is searced first or not. Could someone please tell me? =head1 SUPPORT Bugs should be reported via the CPAN bug tracker at L For other issues, contact the maintainer. =head1 AUTHOR Adam Kennedy Eadamk@cpan.orgE Per Einar Ellefsen Epereinar@cpan.orgE Originated in F. Changed for use in DocSet (for the mod_perl site) and Win32-awareness by me, with slight modifications by Stas Bekman, then extracted to create C. Version 0.04 had some significant platform-related changes, taken from the Perl Power Tools C<`which'> implementation by Abigail with enhancements from Peter Prymmer. See L for more information. =head1 COPYRIGHT Copyright 2002 Per Einar Ellefsen. Some parts copyright 2009 Adam Kennedy. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 SEE ALSO L, L, Perl Power Tools: L. =cut Copy/Recursive.pm000064400000054021147634421560010001 0ustar00package File::Copy::Recursive; use strict; BEGIN { # Keep older versions of Perl from trying to use lexical warnings $INC{'warnings.pm'} = "fake warnings entry for < 5.6 perl ($])" if $] < 5.006; } use warnings; use Carp; use File::Copy; use File::Spec; #not really needed because File::Copy already gets it, but for good measure :) use vars qw( @ISA @EXPORT_OK $VERSION $MaxDepth $KeepMode $CPRFComp $CopyLink $PFSCheck $RemvBase $NoFtlPth $ForcePth $CopyLoop $RMTrgFil $RMTrgDir $CondCopy $BdTrgWrn $SkipFlop $DirPerms ); require Exporter; @ISA = qw(Exporter); @EXPORT_OK = qw(fcopy rcopy dircopy fmove rmove dirmove pathmk pathrm pathempty pathrmdir); $VERSION = '0.38'; $MaxDepth = 0; $KeepMode = 1; $CPRFComp = 0; $CopyLink = eval { local $SIG{'__DIE__'};symlink '',''; 1 } || 0; $PFSCheck = 1; $RemvBase = 0; $NoFtlPth = 0; $ForcePth = 0; $CopyLoop = 0; $RMTrgFil = 0; $RMTrgDir = 0; $CondCopy = {}; $BdTrgWrn = 0; $SkipFlop = 0; $DirPerms = 0777; my $samecheck = sub { return 1 if $^O eq 'MSWin32'; # need better way to check for this on winders... return if @_ != 2 || !defined $_[0] || !defined $_[1]; return if $_[0] eq $_[1]; my $one = ''; if($PFSCheck) { $one = join( '-', ( stat $_[0] )[0,1] ) || ''; my $two = join( '-', ( stat $_[1] )[0,1] ) || ''; if ( $one eq $two && $one ) { carp "$_[0] and $_[1] are identical"; return; } } if(-d $_[0] && !$CopyLoop) { $one = join( '-', ( stat $_[0] )[0,1] ) if !$one; my $abs = File::Spec->rel2abs($_[1]); my @pth = File::Spec->splitdir( $abs ); while(@pth) { my $cur = File::Spec->catdir(@pth); last if !$cur; # probably not necessary, but nice to have just in case :) my $two = join( '-', ( stat $cur )[0,1] ) || ''; if ( $one eq $two && $one ) { # $! = 62; # Too many levels of symbolic links carp "Caught Deep Recursion Condition: $_[0] contains $_[1]"; return; } pop @pth; } } return 1; }; my $glob = sub { my ($do, $src_glob, @args) = @_; local $CPRFComp = 1; my @rt; for my $path ( glob($src_glob) ) { my @call = [$do->($path, @args)] or return; push @rt, \@call; } return @rt; }; my $move = sub { my $fl = shift; my @x; if($fl) { @x = fcopy(@_) or return; } else { @x = dircopy(@_) or return; } if(@x) { if($fl) { unlink $_[0] or return; } else { pathrmdir($_[0]) or return; } if($RemvBase) { my ($volm, $path) = File::Spec->splitpath($_[0]); pathrm(File::Spec->catpath($volm,$path,''), $ForcePth, $NoFtlPth) or return; } } return wantarray ? @x : $x[0]; }; my $ok_todo_asper_condcopy = sub { my $org = shift; my $copy = 1; if(exists $CondCopy->{$org}) { if($CondCopy->{$org}{'md5'}) { } if($copy) { } } return $copy; }; sub fcopy { $samecheck->(@_) or return; if($RMTrgFil && (-d $_[1] || -e $_[1]) ) { my $trg = $_[1]; if( -d $trg ) { my @trgx = File::Spec->splitpath( $_[0] ); $trg = File::Spec->catfile( $_[1], $trgx[ $#trgx ] ); } $samecheck->($_[0], $trg) or return; if(-e $trg) { if($RMTrgFil == 1) { unlink $trg or carp "\$RMTrgFil failed: $!"; } else { unlink $trg or return; } } } my ($volm, $path) = File::Spec->splitpath($_[1]); if($path && !-d $path) { pathmk(File::Spec->catpath($volm,$path,''), $NoFtlPth); } if( -l $_[0] && $CopyLink ) { carp "Copying a symlink ($_[0]) whose target does not exist" if !-e readlink($_[0]) && $BdTrgWrn; symlink readlink(shift()), shift() or return; } else { copy(@_) or return; my @base_file = File::Spec->splitpath($_[0]); my $mode_trg = -d $_[1] ? File::Spec->catfile($_[1], $base_file[ $#base_file ]) : $_[1]; chmod scalar((stat($_[0]))[2]), $mode_trg if $KeepMode; } return wantarray ? (1,0,0) : 1; # use 0's incase they do math on them and in case rcopy() is called in list context = no uninit val warnings } sub rcopy { if (-l $_[0] && $CopyLink) { goto &fcopy; } goto &dircopy if -d $_[0] || substr( $_[0], ( 1 * -1), 1) eq '*'; goto &fcopy; } sub rcopy_glob { $glob->(\&rcopy, @_); } sub dircopy { if($RMTrgDir && -d $_[1]) { if($RMTrgDir == 1) { pathrmdir($_[1]) or carp "\$RMTrgDir failed: $!"; } else { pathrmdir($_[1]) or return; } } my $globstar = 0; my $_zero = $_[0]; my $_one = $_[1]; if ( substr( $_zero, ( 1 * -1 ), 1 ) eq '*') { $globstar = 1; $_zero = substr( $_zero, 0, ( length( $_zero ) - 1 ) ); } $samecheck->( $_zero, $_[1] ) or return; if ( !-d $_zero || ( -e $_[1] && !-d $_[1] ) ) { $! = 20; return; } if(!-d $_[1]) { pathmk($_[1], $NoFtlPth) or return; } else { if($CPRFComp && !$globstar) { my @parts = File::Spec->splitdir($_zero); while($parts[ $#parts ] eq '') { pop @parts; } $_one = File::Spec->catdir($_[1], $parts[$#parts]); } } my $baseend = $_one; my $level = 0; my $filen = 0; my $dirn = 0; my $recurs; #must be my()ed before sub {} since it calls itself $recurs = sub { my ($str,$end,$buf) = @_; $filen++ if $end eq $baseend; $dirn++ if $end eq $baseend; $DirPerms = oct($DirPerms) if substr($DirPerms,0,1) eq '0'; mkdir($end,$DirPerms) or return if !-d $end; chmod scalar((stat($str))[2]), $end if $KeepMode; if($MaxDepth && $MaxDepth =~ m/^\d+$/ && $level >= $MaxDepth) { return ($filen,$dirn,$level) if wantarray; return $filen; } $level++; my @files; if ( $] < 5.006 ) { opendir(STR_DH, $str) or return; @files = grep( $_ ne '.' && $_ ne '..', readdir(STR_DH)); closedir STR_DH; } else { opendir(my $str_dh, $str) or return; @files = grep( $_ ne '.' && $_ ne '..', readdir($str_dh)); closedir $str_dh; } for my $file (@files) { my ($file_ut) = $file =~ m{ (.*) }xms; my $org = File::Spec->catfile($str, $file_ut); my $new = File::Spec->catfile($end, $file_ut); if( -l $org && $CopyLink ) { carp "Copying a symlink ($org) whose target does not exist" if !-e readlink($org) && $BdTrgWrn; symlink readlink($org), $new or return; } elsif(-d $org) { $recurs->($org,$new,$buf) if defined $buf; $recurs->($org,$new) if !defined $buf; $filen++; $dirn++; } else { if($ok_todo_asper_condcopy->($org)) { if($SkipFlop) { fcopy($org,$new,$buf) or next if defined $buf; fcopy($org,$new) or next if !defined $buf; } else { fcopy($org,$new,$buf) or return if defined $buf; fcopy($org,$new) or return if !defined $buf; } chmod scalar((stat($org))[2]), $new if $KeepMode; $filen++; } } } 1; }; $recurs->($_zero, $_one, $_[2]) or return; return wantarray ? ($filen,$dirn,$level) : $filen; } sub fmove { $move->(1, @_) } sub rmove { if (-l $_[0] && $CopyLink) { goto &fmove; } goto &dirmove if -d $_[0] || substr( $_[0], ( 1 * -1), 1) eq '*'; goto &fmove; } sub rmove_glob { $glob->(\&rmove, @_); } sub dirmove { $move->(0, @_) } sub pathmk { my @parts = File::Spec->splitdir( shift() ); my $nofatal = shift; my $pth = $parts[0]; my $zer = 0; if(!$pth) { $pth = File::Spec->catdir($parts[0],$parts[1]); $zer = 1; } for($zer..$#parts) { $DirPerms = oct($DirPerms) if substr($DirPerms,0,1) eq '0'; mkdir($pth,$DirPerms) or return if !-d $pth && !$nofatal; mkdir($pth,$DirPerms) if !-d $pth && $nofatal; $pth = File::Spec->catdir($pth, $parts[$_ + 1]) unless $_ == $#parts; } 1; } sub pathempty { my $pth = shift; return 2 if !-d $pth; my @names; my $pth_dh; if ( $] < 5.006 ) { opendir(PTH_DH, $pth) or return; @names = grep !/^\.+$/, readdir(PTH_DH); } else { opendir($pth_dh, $pth) or return; @names = grep !/^\.+$/, readdir($pth_dh); } for my $name (@names) { my ($name_ut) = $name =~ m{ (.*) }xms; my $flpth = File::Spec->catdir($pth, $name_ut); if( -l $flpth ) { unlink $flpth or return; } elsif(-d $flpth) { pathrmdir($flpth) or return; } else { unlink $flpth or return; } } if ( $] < 5.006 ) { closedir PTH_DH; } else { closedir $pth_dh; } 1; } sub pathrm { my $path = shift; return 2 if !-d $path; my @pth = File::Spec->splitdir( $path ); my $force = shift; while(@pth) { my $cur = File::Spec->catdir(@pth); last if !$cur; # necessary ??? if(!shift()) { pathempty($cur) or return if $force; rmdir $cur or return; } else { pathempty($cur) if $force; rmdir $cur; } pop @pth; } 1; } sub pathrmdir { my $dir = shift; if( -e $dir ) { return if !-d $dir; } else { return 2; } pathempty($dir) or return; rmdir $dir or return; } 1; __END__ =head1 NAME File::Copy::Recursive - Perl extension for recursively copying files and directories =head1 SYNOPSIS use File::Copy::Recursive qw(fcopy rcopy dircopy fmove rmove dirmove); fcopy($orig,$new[,$buf]) or die $!; rcopy($orig,$new[,$buf]) or die $!; dircopy($orig,$new[,$buf]) or die $!; fmove($orig,$new[,$buf]) or die $!; rmove($orig,$new[,$buf]) or die $!; dirmove($orig,$new[,$buf]) or die $!; rcopy_glob("orig/stuff-*", $trg [, $buf]) or die $!; rmove_glob("orig/stuff-*", $trg [,$buf]) or die $!; =head1 DESCRIPTION This module copies and moves directories recursively (or single files, well... singley) to an optional depth and attempts to preserve each file or directory's mode. =head1 EXPORT None by default. But you can export all the functions as in the example above and the path* functions if you wish. =head2 fcopy() This function uses File::Copy's copy() function to copy a file but not a directory. Any directories are recursively created if need be. One difference to File::Copy::copy() is that fcopy attempts to preserve the mode (see Preserving Mode below) The optional $buf in the synopsis if the same as File::Copy::copy()'s 3rd argument returns the same as File::Copy::copy() in scalar context and 1,0,0 in list context to accomidate rcopy()'s list context on regular files. (See below for more info) =head2 dircopy() This function recursively traverses the $orig directory's structure and recursively copies it to the $new directory. $new is created if necessary (multiple non existant directories is ok (IE foo/bar/baz). The script logically and portably creates all of them if necessary). It attempts to preserve the mode (see Preserving Mode below) and by default it copies all the way down into the directory, (see Managing Depth) below. If a directory is not specified it croaks just like fcopy croaks if its not a file that is specified. returns true or false, for true in scalar context it returns the number of files and directories copied, In list context it returns the number of files and directories, number of directories only, depth level traversed. my $num_of_files_and_dirs = dircopy($orig,$new); my($num_of_files_and_dirs,$num_of_dirs,$depth_traversed) = dircopy($orig,$new); Normally it stops and return's if a copy fails, to continue on regardless set $File::Copy::Recursive::SkipFlop to true. local $File::Copy::Recursive::SkipFlop = 1; That way it will copy everythgingit can ina directory and won't stop because of permissions, etc... =head2 rcopy() This function will allow you to specify a file *or* directory. It calls fcopy() if its a file and dircopy() if its a directory. If you call rcopy() (or fcopy() for that matter) on a file in list context, the values will be 1,0,0 since no directories and no depth are used. This is important becasue if its a directory in list context and there is only the initial directory the return value is 1,1,1. =head2 rcopy_glob() This function lets you specify a pattern suitable for perl's glob() as the first argument. Subsequently each path returned by perl's glob() gets rcopy()ied. It returns and array whose items are array refs that contain the return value of each rcopy() call. It forces behavior as if $File::Copy::Recursive::CPRFComp is true. =head2 fmove() Copies the file then removes the original. You can manage the path the original file is in according to $RemvBase. =head2 dirmove() Uses dircopy() to copy the directory then removes the original. You can manage the path the original directory is in according to $RemvBase. =head2 rmove() Like rcopy() but calls fmove() or dirmove() instead. =head2 rmove_glob() Like rcopy_glob() but calls rmove() instead of rcopy() =head3 $RemvBase Default is false. When set to true the *move() functions will not only attempt to remove the original file or directory but will remove the given path it is in. So if you: rmove('foo/bar/baz', '/etc/'); # "baz" is removed from foo/bar after it is successfully copied to /etc/ local $File::Copy::Recursive::Remvbase = 1; rmove('foo/bar/baz','/etc/'); # if baz is successfully copied to /etc/ : # first "baz" is removed from foo/bar # then "foo/bar is removed via pathrm() =head4 $ForcePth Default is false. When set to true it calls pathempty() before any directories are removed to empty the directory so it can be rmdir()'ed when $RemvBase is in effect. =head2 Creating and Removing Paths =head3 $NoFtlPth Default is false. If set to true rmdir(), mkdir(), and pathempty() calls in pathrm() and pathmk() do not return() on failure. If its set to true they just silently go about their business regardless. This isn't a good idea but its there if you want it. =head3 $DirPerms Mode to pass to any mkdir() calls. Defaults to 0777 as per umask()'s POD. Explicitly having this allows older perls to be able to use FCR and might add a bit of flexibility for you. Any value you set it to should be suitable for oct() =head3 Path functions These functions exist soley because they were necessary for the move and copy functions to have the features they do and not because they are of themselves the purpose of this module. That being said, here is how they work so you can understand how the copy and move funtions work and use them by themselves if you wish. =head4 pathrm() Removes a given path recursively. It removes the *entire* path so be carefull!!! Returns 2 if the given path is not a directory. File::Copy::Recursive::pathrm('foo/bar/baz') or die $!; # foo no longer exists Same as: rmdir 'foo/bar/baz' or die $!; rmdir 'foo/bar' or die $!; rmdir 'foo' or die $!; An optional second argument makes it call pathempty() before any rmdir()'s when set to true. File::Copy::Recursive::pathrm('foo/bar/baz', 1) or die $!; # foo no longer exists Same as:PFSCheck File::Copy::Recursive::pathempty('foo/bar/baz') or die $!; rmdir 'foo/bar/baz' or die $!; File::Copy::Recursive::pathempty('foo/bar/') or die $!; rmdir 'foo/bar' or die $!; File::Copy::Recursive::pathempty('foo/') or die $!; rmdir 'foo' or die $!; An optional third argument acts like $File::Copy::Recursive::NoFtlPth, again probably not a good idea. =head4 pathempty() Recursively removes the given directory's contents so it is empty. returns 2 if argument is not a directory, 1 on successfully emptying the directory. File::Copy::Recursive::pathempty($pth) or die $!; # $pth is now an empty directory =head4 pathmk() Creates a given path recursively. Creates foo/bar/baz even if foo does not exist. File::Copy::Recursive::pathmk('foo/bar/baz') or die $!; An optional second argument if true acts just like $File::Copy::Recursive::NoFtlPth, which means you'd never get your die() if something went wrong. Again, probably a *bad* idea. =head4 pathrmdir() Same as rmdir() but it calls pathempty() first to recursively empty it first since rmdir can not remove a directory with contents. Just removes the top directory the path given instead of the entire path like pathrm(). Return 2 if given argument does not exist (IE its already gone). Return false if it exists but is not a directory. =head2 Preserving Mode By default a quiet attempt is made to change the new file or directory to the mode of the old one. To turn this behavior off set $File::Copy::Recursive::KeepMode to false; =head2 Managing Depth You can set the maximum depth a directory structure is recursed by setting: $File::Copy::Recursive::MaxDepth to a whole number greater than 0. =head2 SymLinks If your system supports symlinks then symlinks will be copied as symlinks instead of as the target file. Perl's symlink() is used instead of File::Copy's copy() You can customize this behavior by setting $File::Copy::Recursive::CopyLink to a true or false value. It is already set to true or false dending on your system's support of symlinks so you can check it with an if statement to see how it will behave: if($File::Copy::Recursive::CopyLink) { print "Symlinks will be preserved\n"; } else { print "Symlinks will not be preserved because your system does not support it\n"; } If symlinks are being copied you can set $File::Copy::Recursive::BdTrgWrn to true to make it carp when it copies a link whose target does not exist. Its false by default. local $File::Copy::Recursive::BdTrgWrn = 1; =head2 Removing existing target file or directory before copying. This can be done by setting $File::Copy::Recursive::RMTrgFil or $File::Copy::Recursive::RMTrgDir for file or directory behavior respectively. 0 = off (This is the default) 1 = carp() $! if removal fails 2 = return if removal fails local $File::Copy::Recursive::RMTrgFil = 1; fcopy($orig, $target) or die $!; # if it fails it does warn() and keeps going local $File::Copy::Recursive::RMTrgDir = 2; dircopy($orig, $target) or die $!; # if it fails it does your "or die" This should be unnecessary most of the time but its there if you need it :) =head2 Turning off stat() check By default the files or directories are checked to see if they are the same (IE linked, or two paths (absolute/relative or different relative paths) to the same file) by comparing the file's stat() info. It's a very efficient check that croaks if they are and shouldn't be turned off but if you must for some weird reason just set $File::Copy::Recursive::PFSCheck to a false value. ("PFS" stands for "Physical File System") =head2 Emulating cp -rf dir1/ dir2/ By default dircopy($dir1,$dir2) will put $dir1's contents right into $dir2 whether $dir2 exists or not. You can make dircopy() emulate cp -rf by setting $File::Copy::Recursive::CPRFComp to true. NOTE: This only emulates -f in the sense that it does not prompt. It does not remove the target file or directory if it exists. If you need to do that then use the variables $RMTrgFil and $RMTrgDir described in "Removing existing target file or directory before copying" above. That means that if $dir2 exists it puts the contents into $dir2/$dir1 instead of $dir2 just like cp -rf. If $dir2 does not exist then the contents go into $dir2 like normal (also like cp -rf) So assuming 'foo/file': dircopy('foo', 'bar') or die $!; # if bar does not exist the result is bar/file # if bar does exist the result is bar/file $File::Copy::Recursive::CPRFComp = 1; dircopy('foo', 'bar') or die $!; # if bar does not exist the result is bar/file # if bar does exist the result is bar/foo/file You can also specify a star for cp -rf glob type behavior: dircopy('foo/*', 'bar') or die $!; # if bar does not exist the result is bar/file # if bar does exist the result is bar/file $File::Copy::Recursive::CPRFComp = 1; dircopy('foo/*', 'bar') or die $!; # if bar does not exist the result is bar/file # if bar does exist the result is bar/file NOTE: The '*' is only like cp -rf foo/* and *DOES NOT EXPAND PARTIAL DIRECTORY NAMES LIKE YOUR SHELL DOES* (IE not like cp -rf fo* to copy foo/*) =head2 Allowing Copy Loops If you want to allow: cp -rf . foo/ type behavior set $File::Copy::Recursive::CopyLoop to true. This is false by default so that a check is done to see if the source directory will contain the target directory and croaks to avoid this problem. If you ever find a situation where $CopyLoop = 1 is desirable let me know (IE its a bad bad idea but is there if you want it) (Note: On Windows this was necessary since it uses stat() to detemine samedness and stat() is essencially useless for this on Windows. The test is now simply skipped on Windows but I'd rather have an actual reliable check if anyone in Microsoft land would care to share) =head1 SEE ALSO L L =head1 TO DO I am currently working on and reviewing some other modules to use in the new interface so we can lose the horrid globals as well as some other undesirable traits and also more easily make available some long standing requests. Tests will be easier to do with the new interface and hence the testing focus will shift to the new interface and aim to be comprehensive. The old interface will work, it just won't be brought in until it is used, so it will add no overhead for users of the new interface. I'll add this after the latest verision has been out for a while with no new features or issues found :) =head1 AUTHOR Daniel Muey, L =head1 COPYRIGHT AND LICENSE Copyright 2004 by Daniel Muey This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Tail.pm000064400000061334147634421560006016 0ustar00package File::Tail; use strict; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); require Exporter; @ISA = qw(Exporter); # Items to export into callers namespace by default. Note: do not export # names by default without a very good reason. Use EXPORT_OK instead. # Do not simply export all your public functions/methods/constants. $VERSION = '0.99.3'; # Preloaded methods go here. use FileHandle; #use IO::Seekable; # does not define SEEK_SET in 5005.02 use File::stat; use Carp; use Time::HiRes qw ( time sleep ); #import hires microsecond timers sub SEEK_SET () {0;} sub SEEK_CUR () {1;} sub SEEK_END () {2;} sub interval { my $object=shift @_; if (@_) { $object->{interval}=shift; $object->{interval}=$object->{maxinterval} if $object->{interval}>$object->{maxinterval}; } $object->{interval}; } sub logit { my $object=shift; my @call=caller(1); print # STDERR # time()." ". "\033[7m". $call[3]." ".$object->{"input"}." ".join("",@_). "\033[0m". "\n" if $object->debug; } sub adjustafter { my $self=shift; $self->{adjustafter}=shift if @_; return $self->{adjustafter}; } sub debug { my $self=shift; $self->{"debug"}=shift if @_; return $self->{"debug"}; } sub errmode { my($self, $mode) = @_; my($prev) = $self->{errormode}; if (@_ >= 2) { ## Set the error mode. defined $mode or $mode = ''; if (ref($mode) eq 'CODE') { $self->{errormode} = $mode; } elsif (ref($mode) eq 'ARRAY') { unless (ref($mode->[0]) eq 'CODE') { croak 'bad errmode: first item in list must be a code ref'; $mode = 'die'; } $self->{errormode} = $mode; } else { $self->{errormode} = lc $mode; } } $prev; } sub errmsg { my($self, @errmsgs) = @_; my($prev) = $self->{errormsg}; if (@_ > 0) { $self->{errormsg} = join '', @errmsgs; } $prev; } # end sub errmsg sub error { my($self, @errmsg) = @_; my( $errmsg, $func, $mode, @args, ); if (@_ >= 1) { ## Put error message in the object. $errmsg = join '', @errmsg; $self->{"errormsg"} = $errmsg; ## Do the error action as described by error mode. $mode = $self->{"errormode"}; if (ref($mode) eq 'CODE') { &$mode($errmsg); return; } elsif (ref($mode) eq 'ARRAY') { ($func, @args) = @$mode; &$func(@args); return; } elsif ($mode eq "return") { return; } elsif ($mode eq "warn") { carp $errmsg; } else { # die croak $errmsg; } } else { return $self->{"errormsg"} ne ''; } } # end sub error sub copy { my $self=shift; $self->{copy}=shift if @_; return $self->{copy}; } sub tail { my $self=shift; $self->{"tail"}=shift if @_; return $self->{"tail"}; } sub reset_tail { my $self=shift; $self->{reset_tail}=shift if @_; return $self->{reset_tail}; } sub nowait { my $self=shift; $self->{nowait}=shift if @_; return $self->{nowait}; } sub method { my $self=shift; $self->{method}=shift if @_; return $self->{method}; } sub input { my $self=shift; $self->{input}=shift if @_; return $self->{input}; } sub maxinterval { my $self=shift; $self->{maxinterval}=shift if @_; return $self->{maxinterval}; } sub resetafter { my $self=shift; $self->{resetafter}=shift if @_; return $self->{resetafter}; } sub ignore_nonexistant { my $self=shift; $self->{ignore_nonexistant}=shift if @_; return $self->{ignore_nonexistant}; } sub name_changes { my $self=shift; $self->{name_changes_callback}=shift if @_; return $self->{name_changes_callback}; } sub TIEHANDLE { my $ref=new(@_); } sub READLINE { $_[0]->read(); } sub PRINT { $_[0]->error("PRINT makes no sense in File::Tail"); } sub PRINTF { $_[0]->error("PRINTF makes no sense in File::Tail"); } sub READ { $_[0]->error("READ not implemented in File::Tail -- use READLINE () instead"); } sub GETC { $_[0]->error("GETC not (yet) implemented in File::Tail -- use READLINE () instead"); } sub DESTROY { my($this) = $_[0]; close($this->{"handle"}) if (defined($this) && defined($this->{'handle'})); # undef $_[0]; return; } sub CLOSE { &DESTROY(@_); } sub new { my ($pkg)=shift @_; $pkg=ref($pkg) || $pkg; unless ($pkg) { $pkg="File::Tail"; } my %params; if ($#_ == 0) { $params{"name"}=$_[0]; } else { if (($#_ % 2) != 1) { croak "Odd number of parameters for new"; return; } %params=@_; } my $object = {}; bless $object,$pkg; unless (defined($params{'name'})) { croak "No file name given. Pass filename as \"name\" parameter"; return; } $object->input($params{'name'}); $object->copy($params{'cname'}); $object->method($params{'method'} || "tail"); $object->{buffer}=""; $object->maxinterval($params{'maxinterval'} || 60); $object->interval($params{'interval'} || 10); $object->adjustafter($params{'adjustafter'} || 10); $object->errmode($params{'errmode'} || "die"); $object->resetafter($params{'resetafter'} || ($object->maxinterval*$object->adjustafter)); $object->{"debug"}=($params{'debug'} || 0); $object->{"tail"}=($params{'tail'} || 0); $object->{"nowait"}=($params{'nowait'} || 0); $object->{"maxbuf"}=($params{'maxbuf'} || 16384); $object->{"name_changes_callback"}=($params{'name_changes'} || undef); if (defined $params{'reset_tail'}) { $object->{"reset_tail"} = $params{'reset_tail'}; } else { $object->{"reset_tail"} = -1; } $object->{'ignore_nonexistant'}=($params{'ignore_nonexistant'} || 0); $object->{"lastread"}=0; $object->{"sleepcount"}=0; $object->{"lastcheck"}=0; $object->{"lastreset"}=0; $object->{"nextcheck"}=time(); if ($object->{"method"} eq "tail") { $object->reset_pointers; } # $object->{curpos}=0; # ADDED 25May01: undef warnings when # $object->{endpos}=0; # starting up on a nonexistant file return $object; } # Sets position in file when first opened or after that when reset: # Sets {endpos} and {curpos} for current {handle} based on {tail}. # Sets {tail} to value of {reset_tail}; effect is that first call # uses {tail} and subsequent calls use {reset_tail}. sub position { my $object=shift; $object->{"endpos"}=sysseek($object->{handle},0,SEEK_END); unless ($object->{"tail"}) { $object->{endpos}=$object->{curpos}= sysseek($object->{handle},0,SEEK_END); } elsif ($object->{"tail"}<0) { $object->{endpos}=sysseek($object->{handle},0,SEEK_END); $object->{curpos}=sysseek($object->{handle},0,SEEK_SET); } else { my $crs=0; my $maxlen=sysseek($object->{handle},0,SEEK_END); while ($crs<$object->{"tail"}+1) { my $avlen=length($object->{"buffer"})/($crs+1); $avlen=80 unless $avlen; my $calclen=$avlen*$object->{"tail"}; $calclen+=1024 if $calclen<=length($object->{"buffer"}); $calclen=$maxlen if $calclen>$maxlen; $object->{curpos}=sysseek($object->{handle},-$calclen,SEEK_END); sysread($object->{handle},$object->{"buffer"}, $calclen); $object->{curpos}=sysseek($object->{handle},0,SEEK_CUR); $crs=$object->{"buffer"}=~tr/\n//; last if ($calclen>=$maxlen); } $object->{curpos}=sysseek($object->{handle},0,SEEK_CUR); $object->{endpos}=sysseek($object->{handle},0,SEEK_END); if ($crs>$object->{"tail"}) { my $toskip=$crs-$object->{"tail"}; my $pos; $pos=index($object->{"buffer"},"\n"); while (--$toskip) { $pos=index($object->{"buffer"},"\n",$pos+1); } $object->{"buffer"}=substr($object->{"buffer"},$pos+1); } } $object->{"tail"}=$object->{"reset_tail"}; } # Tries to open or reopen the file; failure is an error unless # {ignore_nonexistant} is set. # # For a new file (ie, first time opened) just does some book-keeping # and calls position for initial position setup. Otherwise does some # checks whether file has been replaced, and if so changes to the new # file. (Calls position for reset setup). # # Always updates {lastreset} to current time. # sub reset_pointers { my $object=shift @_; $object->{lastreset} = time(); my $st; my $oldhandle=$object->{handle}; my $newhandle=FileHandle->new; my $newname; if ($oldhandle && $$object{'name_changes_callback'}) { $newname=$$object{'name_changes_callback'}(); } else { $newname=$object->input; } unless (open($newhandle,"<$newname")) { if ($object->{'ignore_nonexistant'}) { # If we have an oldhandle, leave endpos and curpos to what they # were, since oldhandle will still be the "current" handle elsewhere, # eg, checkpending. This also allows tailing a file which is removed # but still being written to. if (!$oldhandle) { $object->{'endpos'}=0; $object->{'curpos'}=0; } return; } $object->error("Error opening ".$object->input.": $!"); $object->{'endpos'}=0 unless defined($object->{'endpos'}); $object->{'curpos'}=0 unless defined($object->{'curpos'}); return; } binmode($newhandle); if (defined($oldhandle)) { # If file has not been changed since last OK read do not do anything $st=stat($newhandle); # lastread uses fractional time, stat doesn't. This can cause false # negatives. # If the file was changed the same second as it was last read, # we only reopen it if it's length has changed. The alternative is that # sometimes, files would be reopened needlessly, and with reset_tail # set to -1, we would see the whole file again. # Of course, if the file was removed the same second as when it was # last read, and replaced (within that second) with a file of equal # length, we're out of luck. I don't see how to fix this. if ($st->mtime<=int($object->{'lastread'})) { if ($st->size==$object->{"curpos"}) { $object->{lastread} = $st->mtime; return; } else { # will continue further to reset } } else { } $object->{handle}=$newhandle; $object->position; $object->{lastread} = $st->mtime; close($oldhandle); } else { # This is the first time we are opening this file $st=stat($newhandle); $object->{handle}=$newhandle; $object->position; $object->{lastread}=$st->mtime; # for better estimate on initial read } } sub checkpending { my $object=shift @_; my $old_lastcheck = $object->{lastcheck}; $object->{"lastcheck"}=time; unless ($object->{handle}) { $object->reset_pointers; unless ($object->{handle}) { # This try did not open the file either return 0; } } $object->{"endpos"}=sysseek($object->{handle},0,SEEK_END); if ($object->{"endpos"}<$object->{curpos}) { # file was truncated $object->position; } elsif (($object->{curpos}==$object->{"endpos"}) && (time()-$object->{lastread})>$object->{'resetafter'}) { $object->reset_pointers; $object->{"endpos"}=sysseek($object->{handle},0,SEEK_END); } if ($object->{"endpos"}-$object->{curpos}) { sysseek($object->{handle},$object->{curpos},SEEK_SET); readin($object,$object->{"endpos"}-$object->{curpos}); } return ($object->{"endpos"}-$object->{curpos}); } sub predict { my $object=shift; my $crs=$object->{"buffer"}=~tr/\n//; # Count newlines in buffer my @call=caller(1); return 0 if $crs; my $ttw=$object->{"nextcheck"}-time(); return $ttw if $ttw>0; if (my $len=$object->checkpending) { readin($object,$len); return 0; } if ($object->{"sleepcount"}>$object->adjustafter) { $object->{"sleepcount"}=0; $object->interval($object->interval*10); } $object->{"sleepcount"}++; $object->{"nextcheck"}=time()+$object->interval; return ($object->interval); } sub bitprint { return "undef" unless defined($_[0]); return unpack("b*",$_[0]); } sub select { my $object=shift @_ if ref($_[0]); my ($timeout,@fds)=splice(@_,3); $object=$fds[0] unless defined($object); my ($savein,$saveout,$saveerr)=@_; my ($minpred,$mustreturn); if (defined($timeout)) { $minpred=$timeout; $mustreturn=time()+$timeout; } else { $minpred=$fds[0]->predict; } foreach (@fds) { my $val=$_->predict; $minpred=$val if $minpred>$val; } my ($nfound,$timeleft); my @retarr; while (defined($timeout)?(!$nfound && (time()<$mustreturn)):!$nfound) { # Restore bitmaps in case we called select before splice(@_,0,3,$savein,$saveout,$saveerr); ($nfound,$timeleft)=select($_[0],$_[1],$_[2],$minpred); if (defined($timeout)) { $minpred=$timeout; } else { $minpred=$fds[0]->predict; } undef @retarr; foreach (@fds) { my $val=$_->predict; $nfound++ unless $val; $minpred=$val if $minpred>$val; push(@retarr,$_) unless $val; } } if (wantarray) { return ($nfound,$timeleft,@retarr); } else { return $nfound; } } sub readin { my $crs; my ($object,$len)=@_; if (length($object->{"buffer"})) { # this means the file was reset AND a tail -n was active $crs=$object->{"buffer"}=~tr/\n//; # Count newlines in buffer return $crs if $crs; } $len=$object->{"maxbuf"} if ($len>$object->{"maxbuf"}); my $nlen=$len; while ($nlen>0) { $len=sysread($object->{handle},$object->{"buffer"}, $nlen,length($object->{"buffer"})); return 0 if $len==0; # Some busy filesystems return 0 sometimes, # and never give anything more from then on if # you don't give them time to rest. This return # allows File::Tail to use the usual exponential # backoff. $nlen=$nlen-$len; } $object->{curpos}=sysseek($object->{handle},0,SEEK_CUR); $crs=$object->{"buffer"}=~tr/\n//; if ($crs) { my $tmp=time; $object->{lastread}=$tmp if $object->{lastread}>$tmp; #??? $object->interval(($tmp-($object->{lastread}))/$crs); $object->{lastread}=$tmp; } return ($crs); } sub read { my $object=shift @_; my $len; my $pending=$object->{"endpos"}-$object->{"curpos"}; my $crs=$object->{"buffer"}=~m/\n/; while (!$pending && !$crs) { $object->{"sleepcount"}=0; while ($object->predict) { if ($object->nowait) { if (wantarray) { return (); } else { return ""; } } sleep($object->interval) if ($object->interval>0); } $pending=$object->{"endpos"}-$object->{"curpos"}; $crs=$object->{"buffer"}=~m/\n/; } if (!length($object->{"buffer"}) || index($object->{"buffer"},"\n")<0) { readin($object,$pending); } unless (wantarray) { my $str=substr($object->{"buffer"},0, 1+index($object->{"buffer"},"\n")); $object->{"buffer"}=substr($object->{"buffer"}, 1+index($object->{"buffer"},"\n")); return $str; } else { my @str; while (index($object->{"buffer"},"\n")>-1) { push(@str,substr($object->{"buffer"},0, 1+index($object->{"buffer"},"\n"))); $object->{"buffer"}=substr($object->{"buffer"}, 1+index($object->{"buffer"},"\n")); } return @str; } } 1; __END__ =head1 NAME File::Tail - Perl extension for reading from continously updated files =head1 SYNOPSIS use File::Tail; $file=File::Tail->new("/some/log/file"); while (defined($line=$file->read)) { print "$line"; } use File::Tail; $file=File::Tail->new(name=>$name, maxinterval=>300, adjustafter=>7); while (defined($line=$file->read)) { print "$line"; } OR, you could use tie (additional parameters can be passed with the name, or can be set using $ref): use File::Tail; my $ref=tie *FH,"File::Tail",(name=>$name); while () { print "$_"; } Note that the above script will never exit. If there is nothing being written to the file, it will simply block. You can find more synopsii in the file logwatch, which is included in the distribution. Note: Select functionality was added in version 0.9, and it required some reworking of all routines. ***PLEASE*** let me know if you see anything strange happening. You can find two way of using select in the file select_demo which is included in the ditribution. =head1 DESCRIPTION The primary purpose of File::Tail is reading and analysing log files while they are being written, which is especialy usefull if you are monitoring the logging process with a tool like Tobias Oetiker's MRTG. The module tries very hard NOT to "busy-wait" on a file that has little traffic. Any time it reads new data from the file, it counts the number of new lines, and divides that number by the time that passed since data were last written to the file before that. That is considered the average time before new data will be written. When there is no new data to read, C sleeps for that number of seconds. Thereafter, the waiting time is recomputed dynamicaly. Note that C never sleeps for more than the number of seconds set by C. If the file does not get altered for a while, C gets suspicious and startschecking if the file was truncated, or moved and recreated. If anything like that had happened, C will quietly reopen the file, and continue reading. The only way to affect what happens on reopen is by setting the reset_tail parameter (see below). The effect of this is that the scripts need not be aware when the logfiles were rotated, they will just quietly work on. Note that the sleep and time used are from Time::HiRes, so this module should do the right thing even if the time to sleep is less than one second. The logwatch script (also included) demonstrates several ways of calling the methods. =head1 CONSTRUCTOR =head2 new ([ ARGS ]) Creates a C. If it has only one paramter, it is assumed to be the filename. If the open fails, the module performs a croak. I am currently looking for a way to set $! and return undef. You can pass several parameters to new: =over 4 =item name This is the name of the file to open. The file will be opened for reading. This must be a regular file, not a pipe or a terminal (i.e. it must be seekable). =item maxinterval The maximum number of seconds (real number) that will be spent sleeping. Default is 60, meaning C will never spend more than sixty seconds without checking the file. =item interval The initial number of seconds (real number) that will be spent sleeping, before the file is first checked. Default is ten seconds, meaning C will sleep for 10 seconds and then determine, how many new lines have appeared in the file. =item adjustafter The number of C C waits for the current interval, before adjusting the interval upwards. The default is 10. =item resetafter The number of seconds after last change when C decides the file may have been closed and reopened. The default is adjustafter*maxinterval. =item maxbuf The maximum size of the internal buffer. When File::Tail suddenly found an enormous ammount of information in the file (for instance if the retry parameters were set to very infrequent checking and the file was rotated), File::Tail sometimes slurped way too much file into memory. This sets the maximum size of File::Tail's buffer. Default value is 16384 (bytes). A large internal buffer may result in worse performance (as well as increased memory usage), since File::Tail will have to do more work processing the internal buffer. =item nowait Does not block on read, but returns an empty string if there is nothing to read. DO NOT USE THIS unless you know what you are doing. If you are using it in a loop, you probably DON'T know what you are doing. If you want to read tails from multiple files, use select. =item ignore_nonexistant Do not complain if the file doesn't exist when it is first opened or when it is to be reopened. (File may be reopened after resetafter seconds have passed since last data was found.) =item tail When first started, read and return C lines from the file. If C is zero, start at the end of file. If C is negative, return the whole file. Default is C<0>. =item reset_tail Same as tail, but applies after reset. (i.e. after the file has been automaticaly closed and reopened). Defaults to C<-1>, i.e. does not skip any information present in the file when it first checks it. Why would you want it otherwise? I've seen files which have been cycled like this: grep -v lastmonth log >newlog mv log archive/lastmonth mv newlog log kill -HUP logger Obviously, if this happens and you have reset_tail set to c<-1>, you will suddenly get a whole bunch of lines - lines you already saw. So in this case, reset_tail should probably be set to a small positive number or even C<0>. =item name_changes Some logging systems change the name of the file they are writing to, sometimes to include a date, sometimes a sequence number, sometimes other, even more bizarre changes. Instead of trying to implement various clever detection methods, File::Tail will call the code reference defined in name_changes. The code reference should return the string which is the new name of the file to try opening. Note that if the file does not exist, File::Tail will report a fatal error (unless ignore_nonexistant has also been specified). =item debug Set to nonzero if you want to see more about the inner workings of File::Tail. Otherwise not useful. =item errmode Modeled after the methods from Net:Telnet, here you decide how the errors should be handled. The parameter can be a code reference which is called with the error string as a parameter, an array with a code reference as the first parameter and other parameters to be passed to handler subroutine, or one of the words: return - ignore any error (just put error message in errmsg). warn - output the error message but continue die - display error message and exit Default is die. =back =head1 METHODS =head2 read C returns one line from the input file. If there are no lines ready, it blocks until there are. =head2 select C