eaiovnaovbqoebvqoeavibavo Gunzip.pm000064400000076244147634506620006410 0ustar00 package IO::Uncompress::Gunzip ; require 5.006 ; # for RFC1952 use strict ; use warnings; use bytes; use IO::Uncompress::RawInflate 2.061 ; use Compress::Raw::Zlib 2.061 () ; use IO::Compress::Base::Common 2.061 qw(:Status ); use IO::Compress::Gzip::Constants 2.061 ; use IO::Compress::Zlib::Extra 2.061 ; require Exporter ; our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $GunzipError); @ISA = qw( Exporter IO::Uncompress::RawInflate ); @EXPORT_OK = qw( $GunzipError gunzip ); %EXPORT_TAGS = %IO::Uncompress::RawInflate::DEFLATE_CONSTANTS ; push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ; Exporter::export_ok_tags('all'); $GunzipError = ''; $VERSION = '2.061'; sub new { my $class = shift ; $GunzipError = ''; my $obj = IO::Compress::Base::Common::createSelfTiedObject($class, \$GunzipError); $obj->_create(undef, 0, @_); } sub gunzip { my $obj = IO::Compress::Base::Common::createSelfTiedObject(undef, \$GunzipError); return $obj->_inf(@_) ; } sub getExtraParams { return ( 'parseextra' => [IO::Compress::Base::Common::Parse_boolean, 0] ) ; } sub ckParams { my $self = shift ; my $got = shift ; # gunzip always needs crc32 $got->setValue('crc32' => 1); return 1; } sub ckMagic { my $self = shift; my $magic ; $self->smartReadExact(\$magic, GZIP_ID_SIZE); *$self->{HeaderPending} = $magic ; return $self->HeaderError("Minimum header size is " . GZIP_MIN_HEADER_SIZE . " bytes") if length $magic != GZIP_ID_SIZE ; return $self->HeaderError("Bad Magic") if ! isGzipMagic($magic) ; *$self->{Type} = 'rfc1952'; return $magic ; } sub readHeader { my $self = shift; my $magic = shift; return $self->_readGzipHeader($magic); } sub chkTrailer { my $self = shift; my $trailer = shift; # Check CRC & ISIZE my ($CRC32, $ISIZE) = unpack("V V", $trailer) ; *$self->{Info}{CRC32} = $CRC32; *$self->{Info}{ISIZE} = $ISIZE; if (*$self->{Strict}) { return $self->TrailerError("CRC mismatch") if $CRC32 != *$self->{Uncomp}->crc32() ; my $exp_isize = *$self->{UnCompSize}->get32bit(); return $self->TrailerError("ISIZE mismatch. Got $ISIZE" . ", expected $exp_isize") if $ISIZE != $exp_isize ; } return STATUS_OK; } sub isGzipMagic { my $buffer = shift ; return 0 if length $buffer < GZIP_ID_SIZE ; my ($id1, $id2) = unpack("C C", $buffer) ; return $id1 == GZIP_ID1 && $id2 == GZIP_ID2 ; } sub _readFullGzipHeader($) { my ($self) = @_ ; my $magic = '' ; $self->smartReadExact(\$magic, GZIP_ID_SIZE); *$self->{HeaderPending} = $magic ; return $self->HeaderError("Minimum header size is " . GZIP_MIN_HEADER_SIZE . " bytes") if length $magic != GZIP_ID_SIZE ; return $self->HeaderError("Bad Magic") if ! isGzipMagic($magic) ; my $status = $self->_readGzipHeader($magic); delete *$self->{Transparent} if ! defined $status ; return $status ; } sub _readGzipHeader($) { my ($self, $magic) = @_ ; my ($HeaderCRC) ; my ($buffer) = '' ; $self->smartReadExact(\$buffer, GZIP_MIN_HEADER_SIZE - GZIP_ID_SIZE) or return $self->HeaderError("Minimum header size is " . GZIP_MIN_HEADER_SIZE . " bytes") ; my $keep = $magic . $buffer ; *$self->{HeaderPending} = $keep ; # now split out the various parts my ($cm, $flag, $mtime, $xfl, $os) = unpack("C C V C C", $buffer) ; $cm == GZIP_CM_DEFLATED or return $self->HeaderError("Not Deflate (CM is $cm)") ; # check for use of reserved bits return $self->HeaderError("Use of Reserved Bits in FLG field.") if $flag & GZIP_FLG_RESERVED ; my $EXTRA ; my @EXTRA = () ; if ($flag & GZIP_FLG_FEXTRA) { $EXTRA = "" ; $self->smartReadExact(\$buffer, GZIP_FEXTRA_HEADER_SIZE) or return $self->TruncatedHeader("FEXTRA Length") ; my ($XLEN) = unpack("v", $buffer) ; $self->smartReadExact(\$EXTRA, $XLEN) or return $self->TruncatedHeader("FEXTRA Body"); $keep .= $buffer . $EXTRA ; if ($XLEN && *$self->{'ParseExtra'}) { my $bad = IO::Compress::Zlib::Extra::parseRawExtra($EXTRA, \@EXTRA, 1, 1); return $self->HeaderError($bad) if defined $bad; } } my $origname ; if ($flag & GZIP_FLG_FNAME) { $origname = "" ; while (1) { $self->smartReadExact(\$buffer, 1) or return $self->TruncatedHeader("FNAME"); last if $buffer eq GZIP_NULL_BYTE ; $origname .= $buffer } $keep .= $origname . GZIP_NULL_BYTE ; return $self->HeaderError("Non ISO 8859-1 Character found in Name") if *$self->{Strict} && $origname =~ /$GZIP_FNAME_INVALID_CHAR_RE/o ; } my $comment ; if ($flag & GZIP_FLG_FCOMMENT) { $comment = ""; while (1) { $self->smartReadExact(\$buffer, 1) or return $self->TruncatedHeader("FCOMMENT"); last if $buffer eq GZIP_NULL_BYTE ; $comment .= $buffer } $keep .= $comment . GZIP_NULL_BYTE ; return $self->HeaderError("Non ISO 8859-1 Character found in Comment") if *$self->{Strict} && $comment =~ /$GZIP_FCOMMENT_INVALID_CHAR_RE/o ; } if ($flag & GZIP_FLG_FHCRC) { $self->smartReadExact(\$buffer, GZIP_FHCRC_SIZE) or return $self->TruncatedHeader("FHCRC"); $HeaderCRC = unpack("v", $buffer) ; my $crc16 = Compress::Raw::Zlib::crc32($keep) & 0xFF ; return $self->HeaderError("CRC16 mismatch.") if *$self->{Strict} && $crc16 != $HeaderCRC; $keep .= $buffer ; } # Assume compression method is deflated for xfl tests #if ($xfl) { #} *$self->{Type} = 'rfc1952'; return { 'Type' => 'rfc1952', 'FingerprintLength' => 2, 'HeaderLength' => length $keep, 'TrailerLength' => GZIP_TRAILER_SIZE, 'Header' => $keep, 'isMinimalHeader' => $keep eq GZIP_MINIMUM_HEADER ? 1 : 0, 'MethodID' => $cm, 'MethodName' => $cm == GZIP_CM_DEFLATED ? "Deflated" : "Unknown" , 'TextFlag' => $flag & GZIP_FLG_FTEXT ? 1 : 0, 'HeaderCRCFlag' => $flag & GZIP_FLG_FHCRC ? 1 : 0, 'NameFlag' => $flag & GZIP_FLG_FNAME ? 1 : 0, 'CommentFlag' => $flag & GZIP_FLG_FCOMMENT ? 1 : 0, 'ExtraFlag' => $flag & GZIP_FLG_FEXTRA ? 1 : 0, 'Name' => $origname, 'Comment' => $comment, 'Time' => $mtime, 'OsID' => $os, 'OsName' => defined $GZIP_OS_Names{$os} ? $GZIP_OS_Names{$os} : "Unknown", 'HeaderCRC' => $HeaderCRC, 'Flags' => $flag, 'ExtraFlags' => $xfl, 'ExtraFieldRaw' => $EXTRA, 'ExtraField' => [ @EXTRA ], #'CompSize'=> $compsize, #'CRC32'=> $CRC32, #'OrigSize'=> $ISIZE, } } 1; __END__ =head1 NAME IO::Uncompress::Gunzip - Read RFC 1952 files/buffers =head1 SYNOPSIS use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ; my $status = gunzip $input => $output [,OPTS] or die "gunzip failed: $GunzipError\n"; my $z = new IO::Uncompress::Gunzip $input [OPTS] or die "gunzip failed: $GunzipError\n"; $status = $z->read($buffer) $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $line = $z->getline() $char = $z->getc() $char = $z->ungetc() $char = $z->opened() $status = $z->inflateSync() $data = $z->trailingData() $status = $z->nextStream() $data = $z->getHeaderInfo() $z->tell() $z->seek($position, $whence) $z->binmode() $z->fileno() $z->eof() $z->close() $GunzipError ; # IO::File mode <$z> read($z, $buffer); read($z, $buffer, $length); read($z, $buffer, $length, $offset); tell($z) seek($z, $position, $whence) binmode($z) fileno($z) eof($z) close($z) =head1 DESCRIPTION This module provides a Perl interface that allows the reading of files/buffers that conform to RFC 1952. For writing RFC 1952 files/buffers, see the companion module IO::Compress::Gzip. =head1 Functional Interface A top-level function, C, is provided to carry out "one-shot" uncompression between buffers and/or files. For finer control over the uncompression process, see the L section. use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ; gunzip $input_filename_or_reference => $output_filename_or_reference [,OPTS] or die "gunzip failed: $GunzipError\n"; The functional interface needs Perl5.005 or better. =head2 gunzip $input_filename_or_reference => $output_filename_or_reference [, OPTS] C expects at least two parameters, C<$input_filename_or_reference> and C<$output_filename_or_reference>. =head3 The C<$input_filename_or_reference> parameter The parameter, C<$input_filename_or_reference>, is used to define the source of the compressed data. It can take one of the following forms: =over 5 =item A filename If the <$input_filename_or_reference> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for reading and the input data will be read from it. =item A filehandle If the C<$input_filename_or_reference> parameter is a filehandle, the input data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input_filename_or_reference> is a scalar reference, the input data will be read from C<$$input_filename_or_reference>. =item An array reference If C<$input_filename_or_reference> is an array reference, each element in the array must be a filename. The input data will be read from each file in turn. The complete array will be walked to ensure that it only contains valid filenames before any data is uncompressed. =item An Input FileGlob string If C<$input_filename_or_reference> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The input is the list of files that match the fileglob. See L for more details. =back If the C<$input_filename_or_reference> parameter is any other type, C will be returned. =head3 The C<$output_filename_or_reference> parameter The parameter C<$output_filename_or_reference> is used to control the destination of the uncompressed data. This parameter can take one of these forms. =over 5 =item A filename If the C<$output_filename_or_reference> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for writing and the uncompressed data will be written to it. =item A filehandle If the C<$output_filename_or_reference> parameter is a filehandle, the uncompressed data will be written to it. The string '-' can be used as an alias for standard output. =item A scalar reference If C<$output_filename_or_reference> is a scalar reference, the uncompressed data will be stored in C<$$output_filename_or_reference>. =item An Array Reference If C<$output_filename_or_reference> is an array reference, the uncompressed data will be pushed onto the array. =item An Output FileGlob If C<$output_filename_or_reference> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The output is the list of files that match the fileglob. When C<$output_filename_or_reference> is an fileglob string, C<$input_filename_or_reference> must also be a fileglob string. Anything else is an error. See L for more details. =back If the C<$output_filename_or_reference> parameter is any other type, C will be returned. =head2 Notes When C<$input_filename_or_reference> maps to multiple compressed files/buffers and C<$output_filename_or_reference> is a single file/buffer, after uncompression C<$output_filename_or_reference> will contain a concatenation of all the uncompressed data from each of the input files/buffers. =head2 Optional Parameters Unless specified below, the optional parameters for C, C, are the same as those used with the OO interface defined in the L section below. =over 5 =item C<< AutoClose => 0|1 >> This option applies to any input or output data streams to C that are filehandles. If C is specified, and the value is true, it will result in all input and/or output filehandles being closed once C has completed. This parameter defaults to 0. =item C<< BinModeOut => 0|1 >> When writing to a file or filehandle, set C before writing to the file. Defaults to 0. =item C<< Append => 0|1 >> The behaviour of this option is dependent on the type of output data stream. =over 5 =item * A Buffer If C is enabled, all uncompressed data will be append to the end of the output buffer. Otherwise the output buffer will be cleared before any uncompressed data is written to it. =item * A Filename If C is enabled, the file will be opened in append mode. Otherwise the contents of the file, if any, will be truncated before any uncompressed data is written to it. =item * A Filehandle If C is enabled, the filehandle will be positioned to the end of the file via a call to C before any uncompressed data is written to it. Otherwise the file pointer will not be moved. =back When C is specified, and set to true, it will I all uncompressed data to the output data stream. So when the output is a filehandle it will carry out a seek to the eof before writing any uncompressed data. If the output is a filename, it will be opened for appending. If the output is a buffer, all uncompressed data will be appended to the existing buffer. Conversely when C is not specified, or it is present and is set to false, it will operate as follows. When the output is a filename, it will truncate the contents of the file before writing any uncompressed data. If the output is a filehandle its position will not be changed. If the output is a buffer, it will be wiped before any uncompressed data is output. Defaults to 0. =item C<< MultiStream => 0|1 >> If the input file/buffer contains multiple compressed data streams, this option will uncompress the whole lot as a single data stream. Defaults to 0. =item C<< TrailingData => $scalar >> Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option. =back =head2 Examples To read the contents of the file C and write the uncompressed data to the file C. use strict ; use warnings ; use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ; my $input = "file1.txt.gz"; my $output = "file1.txt"; gunzip $input => $output or die "gunzip failed: $GunzipError\n"; To read from an existing Perl filehandle, C<$input>, and write the uncompressed data to a buffer, C<$buffer>. use strict ; use warnings ; use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ; use IO::File ; my $input = new IO::File " \$buffer or die "gunzip failed: $GunzipError\n"; To uncompress all files in the directory "/my/home" that match "*.txt.gz" and store the compressed data in the same directory use strict ; use warnings ; use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ; gunzip '' => '' or die "gunzip failed: $GunzipError\n"; and if you want to compress each file one at a time, this will do the trick use strict ; use warnings ; use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ; for my $input ( glob "/my/home/*.txt.gz" ) { my $output = $input; $output =~ s/.gz// ; gunzip $input => $output or die "Error compressing '$input': $GunzipError\n"; } =head1 OO Interface =head2 Constructor The format of the constructor for IO::Uncompress::Gunzip is shown below my $z = new IO::Uncompress::Gunzip $input [OPTS] or die "IO::Uncompress::Gunzip failed: $GunzipError\n"; Returns an C object on success and undef on failure. The variable C<$GunzipError> will contain an error message on failure. If you are running Perl 5.005 or better the object, C<$z>, returned from IO::Uncompress::Gunzip can be used exactly like an L filehandle. This means that all normal input file operations can be carried out with C<$z>. For example, to read a line from a compressed file/buffer you can use either of these forms $line = $z->getline(); $line = <$z>; The mandatory parameter C<$input> is used to determine the source of the compressed data. This parameter can take one of three forms. =over 5 =item A filename If the C<$input> parameter is a scalar, it is assumed to be a filename. This file will be opened for reading and the compressed data will be read from it. =item A filehandle If the C<$input> parameter is a filehandle, the compressed data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input> is a scalar reference, the compressed data will be read from C<$$input>. =back =head2 Constructor Options The option names defined below are case insensitive and can be optionally prefixed by a '-'. So all of the following are valid -AutoClose -autoclose AUTOCLOSE autoclose OPTS is a combination of the following options: =over 5 =item C<< AutoClose => 0|1 >> This option is only valid when the C<$input> parameter is a filehandle. If specified, and the value is true, it will result in the file being closed once either the C method is called or the IO::Uncompress::Gunzip object is destroyed. This parameter defaults to 0. =item C<< MultiStream => 0|1 >> Allows multiple concatenated compressed streams to be treated as a single compressed stream. Decompression will stop once either the end of the file/buffer is reached, an error is encountered (premature eof, corrupt compressed data) or the end of a stream is not immediately followed by the start of another stream. This parameter defaults to 0. =item C<< Prime => $string >> This option will uncompress the contents of C<$string> before processing the input file/buffer. This option can be useful when the compressed data is embedded in another file/data structure and it is not possible to work out where the compressed data begins without having to read the first few bytes. If this is the case, the uncompression can be I with these bytes using this option. =item C<< Transparent => 0|1 >> If this option is set and the input file/buffer is not compressed data, the module will allow reading of it anyway. In addition, if the input file/buffer does contain compressed data and there is non-compressed data immediately following it, setting this option will make this module treat the whole file/buffer as a single data stream. This option defaults to 1. =item C<< BlockSize => $num >> When reading the compressed input data, IO::Uncompress::Gunzip will read it in blocks of C<$num> bytes. This option defaults to 4096. =item C<< InputLength => $size >> When present this option will limit the number of compressed bytes read from the input file/buffer to C<$size>. This option can be used in the situation where there is useful data directly after the compressed data stream and you know beforehand the exact length of the compressed data stream. This option is mostly used when reading from a filehandle, in which case the file pointer will be left pointing to the first byte directly after the compressed data stream. This option defaults to off. =item C<< Append => 0|1 >> This option controls what the C method does with uncompressed data. If set to 1, all uncompressed data will be appended to the output parameter of the C method. If set to 0, the contents of the output parameter of the C method will be overwritten by the uncompressed data. Defaults to 0. =item C<< Strict => 0|1 >> This option controls whether the extra checks defined below are used when carrying out the decompression. When Strict is on, the extra tests are carried out, when Strict is off they are not. The default for this option is off. =over 5 =item 1 If the FHCRC bit is set in the gzip FLG header byte, the CRC16 bytes in the header must match the crc16 value of the gzip header actually read. =item 2 If the gzip header contains a name field (FNAME) it consists solely of ISO 8859-1 characters. =item 3 If the gzip header contains a comment field (FCOMMENT) it consists solely of ISO 8859-1 characters plus line-feed. =item 4 If the gzip FEXTRA header field is present it must conform to the sub-field structure as defined in RFC 1952. =item 5 The CRC32 and ISIZE trailer fields must be present. =item 6 The value of the CRC32 field read must match the crc32 value of the uncompressed data actually contained in the gzip file. =item 7 The value of the ISIZE fields read must match the length of the uncompressed data actually read from the file. =back =item C<< ParseExtra => 0|1 >> If the gzip FEXTRA header field is present and this option is set, it will force the module to check that it conforms to the sub-field structure as defined in RFC 1952. If the C is on it will automatically enable this option. Defaults to 0. =back =head2 Examples TODO =head1 Methods =head2 read Usage is $status = $z->read($buffer) Reads a block of compressed data (the size the the compressed block is determined by the C option in the constructor), uncompresses it and writes any uncompressed data into C<$buffer>. If the C parameter is set in the constructor, the uncompressed data will be appended to the C<$buffer> parameter. Otherwise C<$buffer> will be overwritten. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 read Usage is $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $status = read($z, $buffer, $length) $status = read($z, $buffer, $length, $offset) Attempt to read C<$length> bytes of uncompressed data into C<$buffer>. The main difference between this form of the C method and the previous one, is that this one will attempt to return I C<$length> bytes. The only circumstances that this function will not is if end-of-file or an IO error is encountered. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 getline Usage is $line = $z->getline() $line = <$z> Reads a single line. This method fully supports the use of of the variable C<$/> (or C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C is in use) to determine what constitutes an end of line. Paragraph mode, record mode and file slurp mode are all supported. =head2 getc Usage is $char = $z->getc() Read a single character. =head2 ungetc Usage is $char = $z->ungetc($string) =head2 inflateSync Usage is $status = $z->inflateSync() TODO =head2 getHeaderInfo Usage is $hdr = $z->getHeaderInfo(); @hdrs = $z->getHeaderInfo(); This method returns either a hash reference (in scalar context) or a list or hash references (in array context) that contains information about each of the header fields in the compressed data stream(s). =over 5 =item Name The contents of the Name header field, if present. If no name is present, the value will be undef. Note this is different from a zero length name, which will return an empty string. =item Comment The contents of the Comment header field, if present. If no comment is present, the value will be undef. Note this is different from a zero length comment, which will return an empty string. =back =head2 tell Usage is $z->tell() tell $z Returns the uncompressed file offset. =head2 eof Usage is $z->eof(); eof($z); Returns true if the end of the compressed input stream has been reached. =head2 seek $z->seek($position, $whence); seek($z, $position, $whence); Provides a sub-set of the C functionality, with the restriction that it is only legal to seek forward in the input file/buffer. It is a fatal error to attempt to seek backward. Note that the implementation of C in this module does not provide true random access to a compressed file/buffer. It works by uncompressing data from the current offset in the file/buffer until it reaches the ucompressed offset specified in the parameters to C. For very small files this may be acceptable behaviour. For large files it may cause an unacceptable delay. The C<$whence> parameter takes one the usual values, namely SEEK_SET, SEEK_CUR or SEEK_END. Returns 1 on success, 0 on failure. =head2 binmode Usage is $z->binmode binmode $z ; This is a noop provided for completeness. =head2 opened $z->opened() Returns true if the object currently refers to a opened file/buffer. =head2 autoflush my $prev = $z->autoflush() my $prev = $z->autoflush(EXPR) If the C<$z> object is associated with a file or a filehandle, this method returns the current autoflush setting for the underlying filehandle. If C is present, and is non-zero, it will enable flushing after every write/print operation. If C<$z> is associated with a buffer, this method has no effect and always returns C. B that the special variable C<$|> B be used to set or retrieve the autoflush setting. =head2 input_line_number $z->input_line_number() $z->input_line_number(EXPR) Returns the current uncompressed line number. If C is present it has the effect of setting the line number. Note that setting the line number does not change the current position within the file/buffer being read. The contents of C<$/> are used to to determine what constitutes a line terminator. =head2 fileno $z->fileno() fileno($z) If the C<$z> object is associated with a file or a filehandle, C will return the underlying file descriptor. Once the C method is called C will return C. If the C<$z> object is associated with a buffer, this method will return C. =head2 close $z->close() ; close $z ; Closes the output file/buffer. For most versions of Perl this method will be automatically invoked if the IO::Uncompress::Gunzip object is destroyed (either explicitly or by the variable with the reference to the object going out of scope). The exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In these cases, the C method will be called automatically, but not until global destruction of all live objects when the program is terminating. Therefore, if you want your scripts to be able to run on all versions of Perl, you should call C explicitly and not rely on automatic closing. Returns true on success, otherwise 0. If the C option has been enabled when the IO::Uncompress::Gunzip object was created, and the object is associated with a file, the underlying file will also be closed. =head2 nextStream Usage is my $status = $z->nextStream(); Skips to the next compressed data stream in the input file/buffer. If a new compressed data stream is found, the eof marker will be cleared and C<$.> will be reset to 0. Returns 1 if a new stream was found, 0 if none was found, and -1 if an error was encountered. =head2 trailingData Usage is my $data = $z->trailingData(); Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. It only makes sense to call this method once the end of the compressed data stream has been encountered. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option in the constructor. =head1 Importing No symbolic constants are required by this IO::Uncompress::Gunzip at present. =over 5 =item :all Imports C and C<$GunzipError>. Same as doing this use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ; =back =head1 EXAMPLES =head2 Working with Net::FTP See L =head1 SEE ALSO L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L L L, L, L, L For RFC 1950, 1951 and 1952 see F, F and F The I compression library was written by Jean-loup Gailly F and Mark Adler F. The primary site for the I compression library is F. The primary site for gzip is F. =head1 AUTHOR This module was written by Paul Marquess, F. =head1 MODIFICATION HISTORY See the Changes file. =head1 COPYRIGHT AND LICENSE Copyright (c) 2005-2013 Paul Marquess. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Bunzip2.pm000064400000054175147634506620006464 0ustar00package IO::Uncompress::Bunzip2 ; use strict ; use warnings; use bytes; use IO::Compress::Base::Common 2.061 qw(:Status ); use IO::Uncompress::Base 2.061 ; use IO::Uncompress::Adapter::Bunzip2 2.061 ; require Exporter ; our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $Bunzip2Error); $VERSION = '2.061'; $Bunzip2Error = ''; @ISA = qw( Exporter IO::Uncompress::Base ); @EXPORT_OK = qw( $Bunzip2Error bunzip2 ) ; #%EXPORT_TAGS = %IO::Uncompress::Base::EXPORT_TAGS ; push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ; #Exporter::export_ok_tags('all'); sub new { my $class = shift ; my $obj = IO::Compress::Base::Common::createSelfTiedObject($class, \$Bunzip2Error); $obj->_create(undef, 0, @_); } sub bunzip2 { my $obj = IO::Compress::Base::Common::createSelfTiedObject(undef, \$Bunzip2Error); return $obj->_inf(@_); } sub getExtraParams { return ( 'verbosity' => [IO::Compress::Base::Common::Parse_boolean, 0], 'small' => [IO::Compress::Base::Common::Parse_boolean, 0], ); } sub ckParams { my $self = shift ; my $got = shift ; return 1; } sub mkUncomp { my $self = shift ; my $got = shift ; my $magic = $self->ckMagic() or return 0; *$self->{Info} = $self->readHeader($magic) or return undef ; my $Small = $got->getValue('small'); my $Verbosity = $got->getValue('verbosity'); my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::Bunzip2::mkUncompObject( $Small, $Verbosity); return $self->saveErrorString(undef, $errstr, $errno) if ! defined $obj; *$self->{Uncomp} = $obj; return 1; } sub ckMagic { my $self = shift; my $magic ; $self->smartReadExact(\$magic, 4); *$self->{HeaderPending} = $magic ; return $self->HeaderError("Header size is " . 4 . " bytes") if length $magic != 4; return $self->HeaderError("Bad Magic.") if ! isBzip2Magic($magic) ; *$self->{Type} = 'bzip2'; return $magic; } sub readHeader { my $self = shift; my $magic = shift ; $self->pushBack($magic); *$self->{HeaderPending} = ''; return { 'Type' => 'bzip2', 'FingerprintLength' => 4, 'HeaderLength' => 4, 'TrailerLength' => 0, 'Header' => '$magic' }; } sub chkTrailer { return STATUS_OK; } sub isBzip2Magic { my $buffer = shift ; return $buffer =~ /^BZh\d$/; } 1 ; __END__ =head1 NAME IO::Uncompress::Bunzip2 - Read bzip2 files/buffers =head1 SYNOPSIS use IO::Uncompress::Bunzip2 qw(bunzip2 $Bunzip2Error) ; my $status = bunzip2 $input => $output [,OPTS] or die "bunzip2 failed: $Bunzip2Error\n"; my $z = new IO::Uncompress::Bunzip2 $input [OPTS] or die "bunzip2 failed: $Bunzip2Error\n"; $status = $z->read($buffer) $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $line = $z->getline() $char = $z->getc() $char = $z->ungetc() $char = $z->opened() $data = $z->trailingData() $status = $z->nextStream() $data = $z->getHeaderInfo() $z->tell() $z->seek($position, $whence) $z->binmode() $z->fileno() $z->eof() $z->close() $Bunzip2Error ; # IO::File mode <$z> read($z, $buffer); read($z, $buffer, $length); read($z, $buffer, $length, $offset); tell($z) seek($z, $position, $whence) binmode($z) fileno($z) eof($z) close($z) =head1 DESCRIPTION This module provides a Perl interface that allows the reading of bzip2 files/buffers. For writing bzip2 files/buffers, see the companion module IO::Compress::Bzip2. =head1 Functional Interface A top-level function, C, is provided to carry out "one-shot" uncompression between buffers and/or files. For finer control over the uncompression process, see the L section. use IO::Uncompress::Bunzip2 qw(bunzip2 $Bunzip2Error) ; bunzip2 $input => $output [,OPTS] or die "bunzip2 failed: $Bunzip2Error\n"; The functional interface needs Perl5.005 or better. =head2 bunzip2 $input => $output [, OPTS] C expects at least two parameters, C<$input> and C<$output>. =head3 The C<$input> parameter The parameter, C<$input>, is used to define the source of the compressed data. It can take one of the following forms: =over 5 =item A filename If the C<$input> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for reading and the input data will be read from it. =item A filehandle If the C<$input> parameter is a filehandle, the input data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input> is a scalar reference, the input data will be read from C<$$input>. =item An array reference If C<$input> is an array reference, each element in the array must be a filename. The input data will be read from each file in turn. The complete array will be walked to ensure that it only contains valid filenames before any data is uncompressed. =item An Input FileGlob string If C<$input> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The input is the list of files that match the fileglob. If the fileglob does not match any files ... See L for more details. =back If the C<$input> parameter is any other type, C will be returned. =head3 The C<$output> parameter The parameter C<$output> is used to control the destination of the uncompressed data. This parameter can take one of these forms. =over 5 =item A filename If the C<$output> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for writing and the uncompressed data will be written to it. =item A filehandle If the C<$output> parameter is a filehandle, the uncompressed data will be written to it. The string '-' can be used as an alias for standard output. =item A scalar reference If C<$output> is a scalar reference, the uncompressed data will be stored in C<$$output>. =item An Array Reference If C<$output> is an array reference, the uncompressed data will be pushed onto the array. =item An Output FileGlob If C<$output> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The output is the list of files that match the fileglob. When C<$output> is an fileglob string, C<$input> must also be a fileglob string. Anything else is an error. =back If the C<$output> parameter is any other type, C will be returned. =head2 Notes When C<$input> maps to multiple compressed files/buffers and C<$output> is a single file/buffer, after uncompression C<$output> will contain a concatenation of all the uncompressed data from each of the input files/buffers. =head2 Optional Parameters Unless specified below, the optional parameters for C, C, are the same as those used with the OO interface defined in the L section below. =over 5 =item C<< AutoClose => 0|1 >> This option applies to any input or output data streams to C that are filehandles. If C is specified, and the value is true, it will result in all input and/or output filehandles being closed once C has completed. This parameter defaults to 0. =item C<< BinModeOut => 0|1 >> When writing to a file or filehandle, set C before writing to the file. Defaults to 0. =item C<< Append => 0|1 >> TODO =item C<< MultiStream => 0|1 >> If the input file/buffer contains multiple compressed data streams, this option will uncompress the whole lot as a single data stream. Defaults to 0. =item C<< TrailingData => $scalar >> Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option. =back =head2 Examples To read the contents of the file C and write the compressed data to the file C. use strict ; use warnings ; use IO::Uncompress::Bunzip2 qw(bunzip2 $Bunzip2Error) ; my $input = "file1.txt.bz2"; my $output = "file1.txt"; bunzip2 $input => $output or die "bunzip2 failed: $Bunzip2Error\n"; To read from an existing Perl filehandle, C<$input>, and write the uncompressed data to a buffer, C<$buffer>. use strict ; use warnings ; use IO::Uncompress::Bunzip2 qw(bunzip2 $Bunzip2Error) ; use IO::File ; my $input = new IO::File " \$buffer or die "bunzip2 failed: $Bunzip2Error\n"; To uncompress all files in the directory "/my/home" that match "*.txt.bz2" and store the compressed data in the same directory use strict ; use warnings ; use IO::Uncompress::Bunzip2 qw(bunzip2 $Bunzip2Error) ; bunzip2 '' => '' or die "bunzip2 failed: $Bunzip2Error\n"; and if you want to compress each file one at a time, this will do the trick use strict ; use warnings ; use IO::Uncompress::Bunzip2 qw(bunzip2 $Bunzip2Error) ; for my $input ( glob "/my/home/*.txt.bz2" ) { my $output = $input; $output =~ s/.bz2// ; bunzip2 $input => $output or die "Error compressing '$input': $Bunzip2Error\n"; } =head1 OO Interface =head2 Constructor The format of the constructor for IO::Uncompress::Bunzip2 is shown below my $z = new IO::Uncompress::Bunzip2 $input [OPTS] or die "IO::Uncompress::Bunzip2 failed: $Bunzip2Error\n"; Returns an C object on success and undef on failure. The variable C<$Bunzip2Error> will contain an error message on failure. If you are running Perl 5.005 or better the object, C<$z>, returned from IO::Uncompress::Bunzip2 can be used exactly like an L filehandle. This means that all normal input file operations can be carried out with C<$z>. For example, to read a line from a compressed file/buffer you can use either of these forms $line = $z->getline(); $line = <$z>; The mandatory parameter C<$input> is used to determine the source of the compressed data. This parameter can take one of three forms. =over 5 =item A filename If the C<$input> parameter is a scalar, it is assumed to be a filename. This file will be opened for reading and the compressed data will be read from it. =item A filehandle If the C<$input> parameter is a filehandle, the compressed data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input> is a scalar reference, the compressed data will be read from C<$$output>. =back =head2 Constructor Options The option names defined below are case insensitive and can be optionally prefixed by a '-'. So all of the following are valid -AutoClose -autoclose AUTOCLOSE autoclose OPTS is a combination of the following options: =over 5 =item C<< AutoClose => 0|1 >> This option is only valid when the C<$input> parameter is a filehandle. If specified, and the value is true, it will result in the file being closed once either the C method is called or the IO::Uncompress::Bunzip2 object is destroyed. This parameter defaults to 0. =item C<< MultiStream => 0|1 >> Allows multiple concatenated compressed streams to be treated as a single compressed stream. Decompression will stop once either the end of the file/buffer is reached, an error is encountered (premature eof, corrupt compressed data) or the end of a stream is not immediately followed by the start of another stream. This parameter defaults to 0. =item C<< Prime => $string >> This option will uncompress the contents of C<$string> before processing the input file/buffer. This option can be useful when the compressed data is embedded in another file/data structure and it is not possible to work out where the compressed data begins without having to read the first few bytes. If this is the case, the uncompression can be I with these bytes using this option. =item C<< Transparent => 0|1 >> If this option is set and the input file/buffer is not compressed data, the module will allow reading of it anyway. In addition, if the input file/buffer does contain compressed data and there is non-compressed data immediately following it, setting this option will make this module treat the whole file/bufffer as a single data stream. This option defaults to 1. =item C<< BlockSize => $num >> When reading the compressed input data, IO::Uncompress::Bunzip2 will read it in blocks of C<$num> bytes. This option defaults to 4096. =item C<< InputLength => $size >> When present this option will limit the number of compressed bytes read from the input file/buffer to C<$size>. This option can be used in the situation where there is useful data directly after the compressed data stream and you know beforehand the exact length of the compressed data stream. This option is mostly used when reading from a filehandle, in which case the file pointer will be left pointing to the first byte directly after the compressed data stream. This option defaults to off. =item C<< Append => 0|1 >> This option controls what the C method does with uncompressed data. If set to 1, all uncompressed data will be appended to the output parameter of the C method. If set to 0, the contents of the output parameter of the C method will be overwritten by the uncompressed data. Defaults to 0. =item C<< Strict => 0|1 >> This option is a no-op. =item C<< Small => 0|1 >> When non-zero this options will make bzip2 use a decompression algorithm that uses less memory at the expense of increasing the amount of time taken for decompression. Default is 0. =back =head2 Examples TODO =head1 Methods =head2 read Usage is $status = $z->read($buffer) Reads a block of compressed data (the size the the compressed block is determined by the C option in the constructor), uncompresses it and writes any uncompressed data into C<$buffer>. If the C parameter is set in the constructor, the uncompressed data will be appended to the C<$buffer> parameter. Otherwise C<$buffer> will be overwritten. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 read Usage is $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $status = read($z, $buffer, $length) $status = read($z, $buffer, $length, $offset) Attempt to read C<$length> bytes of uncompressed data into C<$buffer>. The main difference between this form of the C method and the previous one, is that this one will attempt to return I C<$length> bytes. The only circumstances that this function will not is if end-of-file or an IO error is encountered. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 getline Usage is $line = $z->getline() $line = <$z> Reads a single line. This method fully supports the use of of the variable C<$/> (or C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C is in use) to determine what constitutes an end of line. Paragraph mode, record mode and file slurp mode are all supported. =head2 getc Usage is $char = $z->getc() Read a single character. =head2 ungetc Usage is $char = $z->ungetc($string) =head2 getHeaderInfo Usage is $hdr = $z->getHeaderInfo(); @hdrs = $z->getHeaderInfo(); This method returns either a hash reference (in scalar context) or a list or hash references (in array context) that contains information about each of the header fields in the compressed data stream(s). =head2 tell Usage is $z->tell() tell $z Returns the uncompressed file offset. =head2 eof Usage is $z->eof(); eof($z); Returns true if the end of the compressed input stream has been reached. =head2 seek $z->seek($position, $whence); seek($z, $position, $whence); Provides a sub-set of the C functionality, with the restriction that it is only legal to seek forward in the input file/buffer. It is a fatal error to attempt to seek backward. The C<$whence> parameter takes one the usual values, namely SEEK_SET, SEEK_CUR or SEEK_END. Returns 1 on success, 0 on failure. =head2 binmode Usage is $z->binmode binmode $z ; This is a noop provided for completeness. =head2 opened $z->opened() Returns true if the object currently refers to a opened file/buffer. =head2 autoflush my $prev = $z->autoflush() my $prev = $z->autoflush(EXPR) If the C<$z> object is associated with a file or a filehandle, this method returns the current autoflush setting for the underlying filehandle. If C is present, and is non-zero, it will enable flushing after every write/print operation. If C<$z> is associated with a buffer, this method has no effect and always returns C. B that the special variable C<$|> B be used to set or retrieve the autoflush setting. =head2 input_line_number $z->input_line_number() $z->input_line_number(EXPR) Returns the current uncompressed line number. If C is present it has the effect of setting the line number. Note that setting the line number does not change the current position within the file/buffer being read. The contents of C<$/> are used to to determine what constitutes a line terminator. =head2 fileno $z->fileno() fileno($z) If the C<$z> object is associated with a file or a filehandle, C will return the underlying file descriptor. Once the C method is called C will return C. If the C<$z> object is is associated with a buffer, this method will return C. =head2 close $z->close() ; close $z ; Closes the output file/buffer. For most versions of Perl this method will be automatically invoked if the IO::Uncompress::Bunzip2 object is destroyed (either explicitly or by the variable with the reference to the object going out of scope). The exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In these cases, the C method will be called automatically, but not until global destruction of all live objects when the program is terminating. Therefore, if you want your scripts to be able to run on all versions of Perl, you should call C explicitly and not rely on automatic closing. Returns true on success, otherwise 0. If the C option has been enabled when the IO::Uncompress::Bunzip2 object was created, and the object is associated with a file, the underlying file will also be closed. =head2 nextStream Usage is my $status = $z->nextStream(); Skips to the next compressed data stream in the input file/buffer. If a new compressed data stream is found, the eof marker will be cleared and C<$.> will be reset to 0. Returns 1 if a new stream was found, 0 if none was found, and -1 if an error was encountered. =head2 trailingData Usage is my $data = $z->trailingData(); Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. It only makes sense to call this method once the end of the compressed data stream has been encountered. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option in the constructor. =head1 Importing No symbolic constants are required by this IO::Uncompress::Bunzip2 at present. =over 5 =item :all Imports C and C<$Bunzip2Error>. Same as doing this use IO::Uncompress::Bunzip2 qw(bunzip2 $Bunzip2Error) ; =back =head1 EXAMPLES =head2 Working with Net::FTP See L =head1 SEE ALSO L, L, L, L, L, L, L, L, L, L, L, L, L, L L L, L, L, L The primary site for the bzip2 program is F. See the module L =head1 AUTHOR This module was written by Paul Marquess, F. =head1 MODIFICATION HISTORY See the Changes file. =head1 COPYRIGHT AND LICENSE Copyright (c) 2005-2008 Paul Marquess. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. AnyUncompress.pm000064400000071373147634506620007740 0ustar00package IO::Uncompress::AnyUncompress ; use strict; use warnings; use bytes; use IO::Compress::Base::Common 2.061 (); use IO::Uncompress::Base 2.061 ; require Exporter ; our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $AnyUncompressError); $VERSION = '2.061'; $AnyUncompressError = ''; @ISA = qw( Exporter IO::Uncompress::Base ); @EXPORT_OK = qw( $AnyUncompressError anyuncompress ) ; %EXPORT_TAGS = %IO::Uncompress::Base::DEFLATE_CONSTANTS ; push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ; Exporter::export_ok_tags('all'); # TODO - allow the user to pick a set of the three formats to allow # or just assume want to auto-detect any of the three formats. BEGIN { eval ' use IO::Uncompress::Adapter::Inflate 2.061 ;'; eval ' use IO::Uncompress::Adapter::Bunzip2 2.061 ;'; eval ' use IO::Uncompress::Adapter::LZO 2.061 ;'; eval ' use IO::Uncompress::Adapter::Lzf 2.061 ;'; eval ' use IO::Uncompress::Adapter::UnLzma 2.061 ;'; eval ' use IO::Uncompress::Adapter::UnXz 2.061 ;'; eval ' use IO::Uncompress::Bunzip2 2.061 ;'; eval ' use IO::Uncompress::UnLzop 2.061 ;'; eval ' use IO::Uncompress::Gunzip 2.061 ;'; eval ' use IO::Uncompress::Inflate 2.061 ;'; eval ' use IO::Uncompress::RawInflate 2.061 ;'; eval ' use IO::Uncompress::Unzip 2.061 ;'; eval ' use IO::Uncompress::UnLzf 2.061 ;'; eval ' use IO::Uncompress::UnLzma 2.061 ;'; eval ' use IO::Uncompress::UnXz 2.061 ;'; } sub new { my $class = shift ; my $obj = IO::Compress::Base::Common::createSelfTiedObject($class, \$AnyUncompressError); $obj->_create(undef, 0, @_); } sub anyuncompress { my $obj = IO::Compress::Base::Common::createSelfTiedObject(undef, \$AnyUncompressError); return $obj->_inf(@_) ; } sub getExtraParams { return ( 'rawinflate' => [IO::Compress::Base::Common::Parse_boolean, 0] , 'unlzma' => [IO::Compress::Base::Common::Parse_boolean, 0] ) ; } sub ckParams { my $self = shift ; my $got = shift ; # any always needs both crc32 and adler32 $got->setValue('crc32' => 1); $got->setValue('adler32' => 1); return 1; } sub mkUncomp { my $self = shift ; my $got = shift ; my $magic ; # try zlib first if (defined $IO::Uncompress::RawInflate::VERSION ) { my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::Inflate::mkUncompObject(); return $self->saveErrorString(undef, $errstr, $errno) if ! defined $obj; *$self->{Uncomp} = $obj; my @possible = qw( Inflate Gunzip Unzip ); unshift @possible, 'RawInflate' if $got->getValue('rawinflate'); $magic = $self->ckMagic( @possible ); if ($magic) { *$self->{Info} = $self->readHeader($magic) or return undef ; return 1; } } if (defined $IO::Uncompress::UnLzma::VERSION && $got->getValue('unlzma')) { my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::UnLzma::mkUncompObject(); return $self->saveErrorString(undef, $errstr, $errno) if ! defined $obj; *$self->{Uncomp} = $obj; my @possible = qw( UnLzma ); #unshift @possible, 'RawInflate' # if $got->getValue('rawinflate'); if ( *$self->{Info} = $self->ckMagic( @possible )) { return 1; } } if (defined $IO::Uncompress::UnXz::VERSION and $magic = $self->ckMagic('UnXz')) { *$self->{Info} = $self->readHeader($magic) or return undef ; my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::UnXz::mkUncompObject(); return $self->saveErrorString(undef, $errstr, $errno) if ! defined $obj; *$self->{Uncomp} = $obj; return 1; } if (defined $IO::Uncompress::Bunzip2::VERSION and $magic = $self->ckMagic('Bunzip2')) { *$self->{Info} = $self->readHeader($magic) or return undef ; my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::Bunzip2::mkUncompObject(); return $self->saveErrorString(undef, $errstr, $errno) if ! defined $obj; *$self->{Uncomp} = $obj; return 1; } if (defined $IO::Uncompress::UnLzop::VERSION and $magic = $self->ckMagic('UnLzop')) { *$self->{Info} = $self->readHeader($magic) or return undef ; my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::LZO::mkUncompObject(); return $self->saveErrorString(undef, $errstr, $errno) if ! defined $obj; *$self->{Uncomp} = $obj; return 1; } if (defined $IO::Uncompress::UnLzf::VERSION and $magic = $self->ckMagic('UnLzf')) { *$self->{Info} = $self->readHeader($magic) or return undef ; my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::Lzf::mkUncompObject(); return $self->saveErrorString(undef, $errstr, $errno) if ! defined $obj; *$self->{Uncomp} = $obj; return 1; } return 0 ; } sub ckMagic { my $self = shift; my @names = @_ ; my $keep = ref $self ; for my $class ( map { "IO::Uncompress::$_" } @names) { bless $self => $class; my $magic = $self->ckMagic(); if ($magic) { #bless $self => $class; return $magic ; } $self->pushBack(*$self->{HeaderPending}) ; *$self->{HeaderPending} = '' ; } bless $self => $keep; return undef; } 1 ; __END__ =head1 NAME IO::Uncompress::AnyUncompress - Uncompress gzip, zip, bzip2 or lzop file/buffer =head1 SYNOPSIS use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ; my $status = anyuncompress $input => $output [,OPTS] or die "anyuncompress failed: $AnyUncompressError\n"; my $z = new IO::Uncompress::AnyUncompress $input [OPTS] or die "anyuncompress failed: $AnyUncompressError\n"; $status = $z->read($buffer) $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $line = $z->getline() $char = $z->getc() $char = $z->ungetc() $char = $z->opened() $data = $z->trailingData() $status = $z->nextStream() $data = $z->getHeaderInfo() $z->tell() $z->seek($position, $whence) $z->binmode() $z->fileno() $z->eof() $z->close() $AnyUncompressError ; # IO::File mode <$z> read($z, $buffer); read($z, $buffer, $length); read($z, $buffer, $length, $offset); tell($z) seek($z, $position, $whence) binmode($z) fileno($z) eof($z) close($z) =head1 DESCRIPTION This module provides a Perl interface that allows the reading of files/buffers that have been compressed with a variety of compression libraries. The formats supported are: =over 5 =item RFC 1950 =item RFC 1951 (optionally) =item gzip (RFC 1952) =item zip =item bzip2 =item lzop =item lzf =item lzma =item xz =back The module will auto-detect which, if any, of the supported compression formats is being used. =head1 Functional Interface A top-level function, C, is provided to carry out "one-shot" uncompression between buffers and/or files. For finer control over the uncompression process, see the L section. use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ; anyuncompress $input_filename_or_reference => $output_filename_or_reference [,OPTS] or die "anyuncompress failed: $AnyUncompressError\n"; The functional interface needs Perl5.005 or better. =head2 anyuncompress $input_filename_or_reference => $output_filename_or_reference [, OPTS] C expects at least two parameters, C<$input_filename_or_reference> and C<$output_filename_or_reference>. =head3 The C<$input_filename_or_reference> parameter The parameter, C<$input_filename_or_reference>, is used to define the source of the compressed data. It can take one of the following forms: =over 5 =item A filename If the <$input_filename_or_reference> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for reading and the input data will be read from it. =item A filehandle If the C<$input_filename_or_reference> parameter is a filehandle, the input data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input_filename_or_reference> is a scalar reference, the input data will be read from C<$$input_filename_or_reference>. =item An array reference If C<$input_filename_or_reference> is an array reference, each element in the array must be a filename. The input data will be read from each file in turn. The complete array will be walked to ensure that it only contains valid filenames before any data is uncompressed. =item An Input FileGlob string If C<$input_filename_or_reference> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The input is the list of files that match the fileglob. See L for more details. =back If the C<$input_filename_or_reference> parameter is any other type, C will be returned. =head3 The C<$output_filename_or_reference> parameter The parameter C<$output_filename_or_reference> is used to control the destination of the uncompressed data. This parameter can take one of these forms. =over 5 =item A filename If the C<$output_filename_or_reference> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for writing and the uncompressed data will be written to it. =item A filehandle If the C<$output_filename_or_reference> parameter is a filehandle, the uncompressed data will be written to it. The string '-' can be used as an alias for standard output. =item A scalar reference If C<$output_filename_or_reference> is a scalar reference, the uncompressed data will be stored in C<$$output_filename_or_reference>. =item An Array Reference If C<$output_filename_or_reference> is an array reference, the uncompressed data will be pushed onto the array. =item An Output FileGlob If C<$output_filename_or_reference> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The output is the list of files that match the fileglob. When C<$output_filename_or_reference> is an fileglob string, C<$input_filename_or_reference> must also be a fileglob string. Anything else is an error. See L for more details. =back If the C<$output_filename_or_reference> parameter is any other type, C will be returned. =head2 Notes When C<$input_filename_or_reference> maps to multiple compressed files/buffers and C<$output_filename_or_reference> is a single file/buffer, after uncompression C<$output_filename_or_reference> will contain a concatenation of all the uncompressed data from each of the input files/buffers. =head2 Optional Parameters Unless specified below, the optional parameters for C, C, are the same as those used with the OO interface defined in the L section below. =over 5 =item C<< AutoClose => 0|1 >> This option applies to any input or output data streams to C that are filehandles. If C is specified, and the value is true, it will result in all input and/or output filehandles being closed once C has completed. This parameter defaults to 0. =item C<< BinModeOut => 0|1 >> When writing to a file or filehandle, set C before writing to the file. Defaults to 0. =item C<< Append => 0|1 >> The behaviour of this option is dependent on the type of output data stream. =over 5 =item * A Buffer If C is enabled, all uncompressed data will be append to the end of the output buffer. Otherwise the output buffer will be cleared before any uncompressed data is written to it. =item * A Filename If C is enabled, the file will be opened in append mode. Otherwise the contents of the file, if any, will be truncated before any uncompressed data is written to it. =item * A Filehandle If C is enabled, the filehandle will be positioned to the end of the file via a call to C before any uncompressed data is written to it. Otherwise the file pointer will not be moved. =back When C is specified, and set to true, it will I all uncompressed data to the output data stream. So when the output is a filehandle it will carry out a seek to the eof before writing any uncompressed data. If the output is a filename, it will be opened for appending. If the output is a buffer, all uncompressed data will be appended to the existing buffer. Conversely when C is not specified, or it is present and is set to false, it will operate as follows. When the output is a filename, it will truncate the contents of the file before writing any uncompressed data. If the output is a filehandle its position will not be changed. If the output is a buffer, it will be wiped before any uncompressed data is output. Defaults to 0. =item C<< MultiStream => 0|1 >> If the input file/buffer contains multiple compressed data streams, this option will uncompress the whole lot as a single data stream. Defaults to 0. =item C<< TrailingData => $scalar >> Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option. =back =head2 Examples To read the contents of the file C and write the uncompressed data to the file C. use strict ; use warnings ; use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ; my $input = "file1.txt.Compressed"; my $output = "file1.txt"; anyuncompress $input => $output or die "anyuncompress failed: $AnyUncompressError\n"; To read from an existing Perl filehandle, C<$input>, and write the uncompressed data to a buffer, C<$buffer>. use strict ; use warnings ; use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ; use IO::File ; my $input = new IO::File " \$buffer or die "anyuncompress failed: $AnyUncompressError\n"; To uncompress all files in the directory "/my/home" that match "*.txt.Compressed" and store the compressed data in the same directory use strict ; use warnings ; use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ; anyuncompress '' => '' or die "anyuncompress failed: $AnyUncompressError\n"; and if you want to compress each file one at a time, this will do the trick use strict ; use warnings ; use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ; for my $input ( glob "/my/home/*.txt.Compressed" ) { my $output = $input; $output =~ s/.Compressed// ; anyuncompress $input => $output or die "Error compressing '$input': $AnyUncompressError\n"; } =head1 OO Interface =head2 Constructor The format of the constructor for IO::Uncompress::AnyUncompress is shown below my $z = new IO::Uncompress::AnyUncompress $input [OPTS] or die "IO::Uncompress::AnyUncompress failed: $AnyUncompressError\n"; Returns an C object on success and undef on failure. The variable C<$AnyUncompressError> will contain an error message on failure. If you are running Perl 5.005 or better the object, C<$z>, returned from IO::Uncompress::AnyUncompress can be used exactly like an L filehandle. This means that all normal input file operations can be carried out with C<$z>. For example, to read a line from a compressed file/buffer you can use either of these forms $line = $z->getline(); $line = <$z>; The mandatory parameter C<$input> is used to determine the source of the compressed data. This parameter can take one of three forms. =over 5 =item A filename If the C<$input> parameter is a scalar, it is assumed to be a filename. This file will be opened for reading and the compressed data will be read from it. =item A filehandle If the C<$input> parameter is a filehandle, the compressed data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input> is a scalar reference, the compressed data will be read from C<$$input>. =back =head2 Constructor Options The option names defined below are case insensitive and can be optionally prefixed by a '-'. So all of the following are valid -AutoClose -autoclose AUTOCLOSE autoclose OPTS is a combination of the following options: =over 5 =item C<< AutoClose => 0|1 >> This option is only valid when the C<$input> parameter is a filehandle. If specified, and the value is true, it will result in the file being closed once either the C method is called or the IO::Uncompress::AnyUncompress object is destroyed. This parameter defaults to 0. =item C<< MultiStream => 0|1 >> Allows multiple concatenated compressed streams to be treated as a single compressed stream. Decompression will stop once either the end of the file/buffer is reached, an error is encountered (premature eof, corrupt compressed data) or the end of a stream is not immediately followed by the start of another stream. This parameter defaults to 0. =item C<< Prime => $string >> This option will uncompress the contents of C<$string> before processing the input file/buffer. This option can be useful when the compressed data is embedded in another file/data structure and it is not possible to work out where the compressed data begins without having to read the first few bytes. If this is the case, the uncompression can be I with these bytes using this option. =item C<< Transparent => 0|1 >> If this option is set and the input file/buffer is not compressed data, the module will allow reading of it anyway. In addition, if the input file/buffer does contain compressed data and there is non-compressed data immediately following it, setting this option will make this module treat the whole file/buffer as a single data stream. This option defaults to 1. =item C<< BlockSize => $num >> When reading the compressed input data, IO::Uncompress::AnyUncompress will read it in blocks of C<$num> bytes. This option defaults to 4096. =item C<< InputLength => $size >> When present this option will limit the number of compressed bytes read from the input file/buffer to C<$size>. This option can be used in the situation where there is useful data directly after the compressed data stream and you know beforehand the exact length of the compressed data stream. This option is mostly used when reading from a filehandle, in which case the file pointer will be left pointing to the first byte directly after the compressed data stream. This option defaults to off. =item C<< Append => 0|1 >> This option controls what the C method does with uncompressed data. If set to 1, all uncompressed data will be appended to the output parameter of the C method. If set to 0, the contents of the output parameter of the C method will be overwritten by the uncompressed data. Defaults to 0. =item C<< Strict => 0|1 >> This option controls whether the extra checks defined below are used when carrying out the decompression. When Strict is on, the extra tests are carried out, when Strict is off they are not. The default for this option is off. =item C<< RawInflate => 0|1 >> When auto-detecting the compressed format, try to test for raw-deflate (RFC 1951) content using the C module. The reason this is not default behaviour is because RFC 1951 content can only be detected by attempting to uncompress it. This process is error prone and can result is false positives. Defaults to 0. =item C<< UnLzma => 0|1 >> When auto-detecting the compressed format, try to test for lzma_alone content using the C module. The reason this is not default behaviour is because lzma_alone content can only be detected by attempting to uncompress it. This process is error prone and can result is false positives. Defaults to 0. =back =head2 Examples TODO =head1 Methods =head2 read Usage is $status = $z->read($buffer) Reads a block of compressed data (the size the the compressed block is determined by the C option in the constructor), uncompresses it and writes any uncompressed data into C<$buffer>. If the C parameter is set in the constructor, the uncompressed data will be appended to the C<$buffer> parameter. Otherwise C<$buffer> will be overwritten. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 read Usage is $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $status = read($z, $buffer, $length) $status = read($z, $buffer, $length, $offset) Attempt to read C<$length> bytes of uncompressed data into C<$buffer>. The main difference between this form of the C method and the previous one, is that this one will attempt to return I C<$length> bytes. The only circumstances that this function will not is if end-of-file or an IO error is encountered. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 getline Usage is $line = $z->getline() $line = <$z> Reads a single line. This method fully supports the use of of the variable C<$/> (or C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C is in use) to determine what constitutes an end of line. Paragraph mode, record mode and file slurp mode are all supported. =head2 getc Usage is $char = $z->getc() Read a single character. =head2 ungetc Usage is $char = $z->ungetc($string) =head2 getHeaderInfo Usage is $hdr = $z->getHeaderInfo(); @hdrs = $z->getHeaderInfo(); This method returns either a hash reference (in scalar context) or a list or hash references (in array context) that contains information about each of the header fields in the compressed data stream(s). =head2 tell Usage is $z->tell() tell $z Returns the uncompressed file offset. =head2 eof Usage is $z->eof(); eof($z); Returns true if the end of the compressed input stream has been reached. =head2 seek $z->seek($position, $whence); seek($z, $position, $whence); Provides a sub-set of the C functionality, with the restriction that it is only legal to seek forward in the input file/buffer. It is a fatal error to attempt to seek backward. Note that the implementation of C in this module does not provide true random access to a compressed file/buffer. It works by uncompressing data from the current offset in the file/buffer until it reaches the ucompressed offset specified in the parameters to C. For very small files this may be acceptable behaviour. For large files it may cause an unacceptable delay. The C<$whence> parameter takes one the usual values, namely SEEK_SET, SEEK_CUR or SEEK_END. Returns 1 on success, 0 on failure. =head2 binmode Usage is $z->binmode binmode $z ; This is a noop provided for completeness. =head2 opened $z->opened() Returns true if the object currently refers to a opened file/buffer. =head2 autoflush my $prev = $z->autoflush() my $prev = $z->autoflush(EXPR) If the C<$z> object is associated with a file or a filehandle, this method returns the current autoflush setting for the underlying filehandle. If C is present, and is non-zero, it will enable flushing after every write/print operation. If C<$z> is associated with a buffer, this method has no effect and always returns C. B that the special variable C<$|> B be used to set or retrieve the autoflush setting. =head2 input_line_number $z->input_line_number() $z->input_line_number(EXPR) Returns the current uncompressed line number. If C is present it has the effect of setting the line number. Note that setting the line number does not change the current position within the file/buffer being read. The contents of C<$/> are used to to determine what constitutes a line terminator. =head2 fileno $z->fileno() fileno($z) If the C<$z> object is associated with a file or a filehandle, C will return the underlying file descriptor. Once the C method is called C will return C. If the C<$z> object is associated with a buffer, this method will return C. =head2 close $z->close() ; close $z ; Closes the output file/buffer. For most versions of Perl this method will be automatically invoked if the IO::Uncompress::AnyUncompress object is destroyed (either explicitly or by the variable with the reference to the object going out of scope). The exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In these cases, the C method will be called automatically, but not until global destruction of all live objects when the program is terminating. Therefore, if you want your scripts to be able to run on all versions of Perl, you should call C explicitly and not rely on automatic closing. Returns true on success, otherwise 0. If the C option has been enabled when the IO::Uncompress::AnyUncompress object was created, and the object is associated with a file, the underlying file will also be closed. =head2 nextStream Usage is my $status = $z->nextStream(); Skips to the next compressed data stream in the input file/buffer. If a new compressed data stream is found, the eof marker will be cleared and C<$.> will be reset to 0. Returns 1 if a new stream was found, 0 if none was found, and -1 if an error was encountered. =head2 trailingData Usage is my $data = $z->trailingData(); Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. It only makes sense to call this method once the end of the compressed data stream has been encountered. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option in the constructor. =head1 Importing No symbolic constants are required by this IO::Uncompress::AnyUncompress at present. =over 5 =item :all Imports C and C<$AnyUncompressError>. Same as doing this use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ; =back =head1 EXAMPLES =head1 SEE ALSO L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L L L, L, L, L =head1 AUTHOR This module was written by Paul Marquess, F. =head1 MODIFICATION HISTORY See the Changes file. =head1 COPYRIGHT AND LICENSE Copyright (c) 2005-2013 Paul Marquess. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. AnyInflate.pm000064400000066436147634506620007170 0ustar00package IO::Uncompress::AnyInflate ; # for RFC1950, RFC1951 or RFC1952 use strict; use warnings; use bytes; use IO::Compress::Base::Common 2.061 (); use IO::Uncompress::Adapter::Inflate 2.061 (); use IO::Uncompress::Base 2.061 ; use IO::Uncompress::Gunzip 2.061 ; use IO::Uncompress::Inflate 2.061 ; use IO::Uncompress::RawInflate 2.061 ; use IO::Uncompress::Unzip 2.061 ; require Exporter ; our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $AnyInflateError); $VERSION = '2.061'; $AnyInflateError = ''; @ISA = qw( Exporter IO::Uncompress::Base ); @EXPORT_OK = qw( $AnyInflateError anyinflate ) ; %EXPORT_TAGS = %IO::Uncompress::Base::DEFLATE_CONSTANTS ; push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ; Exporter::export_ok_tags('all'); # TODO - allow the user to pick a set of the three formats to allow # or just assume want to auto-detect any of the three formats. sub new { my $class = shift ; my $obj = IO::Compress::Base::Common::createSelfTiedObject($class, \$AnyInflateError); $obj->_create(undef, 0, @_); } sub anyinflate { my $obj = IO::Compress::Base::Common::createSelfTiedObject(undef, \$AnyInflateError); return $obj->_inf(@_) ; } sub getExtraParams { use IO::Compress::Base::Common 2.061 qw(:Parse); return ( 'rawinflate' => [Parse_boolean, 0] ) ; } sub ckParams { my $self = shift ; my $got = shift ; # any always needs both crc32 and adler32 $got->setValue('crc32' => 1); $got->setValue('adler32' => 1); return 1; } sub mkUncomp { my $self = shift ; my $got = shift ; my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::Inflate::mkUncompObject(); return $self->saveErrorString(undef, $errstr, $errno) if ! defined $obj; *$self->{Uncomp} = $obj; my @possible = qw( Inflate Gunzip Unzip ); unshift @possible, 'RawInflate' if 1 || $got->getValue('rawinflate'); my $magic = $self->ckMagic( @possible ); if ($magic) { *$self->{Info} = $self->readHeader($magic) or return undef ; return 1; } return 0 ; } sub ckMagic { my $self = shift; my @names = @_ ; my $keep = ref $self ; for my $class ( map { "IO::Uncompress::$_" } @names) { bless $self => $class; my $magic = $self->ckMagic(); if ($magic) { #bless $self => $class; return $magic ; } $self->pushBack(*$self->{HeaderPending}) ; *$self->{HeaderPending} = '' ; } bless $self => $keep; return undef; } 1 ; __END__ =head1 NAME IO::Uncompress::AnyInflate - Uncompress zlib-based (zip, gzip) file/buffer =head1 SYNOPSIS use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ; my $status = anyinflate $input => $output [,OPTS] or die "anyinflate failed: $AnyInflateError\n"; my $z = new IO::Uncompress::AnyInflate $input [OPTS] or die "anyinflate failed: $AnyInflateError\n"; $status = $z->read($buffer) $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $line = $z->getline() $char = $z->getc() $char = $z->ungetc() $char = $z->opened() $status = $z->inflateSync() $data = $z->trailingData() $status = $z->nextStream() $data = $z->getHeaderInfo() $z->tell() $z->seek($position, $whence) $z->binmode() $z->fileno() $z->eof() $z->close() $AnyInflateError ; # IO::File mode <$z> read($z, $buffer); read($z, $buffer, $length); read($z, $buffer, $length, $offset); tell($z) seek($z, $position, $whence) binmode($z) fileno($z) eof($z) close($z) =head1 DESCRIPTION This module provides a Perl interface that allows the reading of files/buffers that have been compressed in a number of formats that use the zlib compression library. The formats supported are =over 5 =item RFC 1950 =item RFC 1951 (optionally) =item gzip (RFC 1952) =item zip =back The module will auto-detect which, if any, of the supported compression formats is being used. =head1 Functional Interface A top-level function, C, is provided to carry out "one-shot" uncompression between buffers and/or files. For finer control over the uncompression process, see the L section. use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ; anyinflate $input_filename_or_reference => $output_filename_or_reference [,OPTS] or die "anyinflate failed: $AnyInflateError\n"; The functional interface needs Perl5.005 or better. =head2 anyinflate $input_filename_or_reference => $output_filename_or_reference [, OPTS] C expects at least two parameters, C<$input_filename_or_reference> and C<$output_filename_or_reference>. =head3 The C<$input_filename_or_reference> parameter The parameter, C<$input_filename_or_reference>, is used to define the source of the compressed data. It can take one of the following forms: =over 5 =item A filename If the <$input_filename_or_reference> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for reading and the input data will be read from it. =item A filehandle If the C<$input_filename_or_reference> parameter is a filehandle, the input data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input_filename_or_reference> is a scalar reference, the input data will be read from C<$$input_filename_or_reference>. =item An array reference If C<$input_filename_or_reference> is an array reference, each element in the array must be a filename. The input data will be read from each file in turn. The complete array will be walked to ensure that it only contains valid filenames before any data is uncompressed. =item An Input FileGlob string If C<$input_filename_or_reference> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The input is the list of files that match the fileglob. See L for more details. =back If the C<$input_filename_or_reference> parameter is any other type, C will be returned. =head3 The C<$output_filename_or_reference> parameter The parameter C<$output_filename_or_reference> is used to control the destination of the uncompressed data. This parameter can take one of these forms. =over 5 =item A filename If the C<$output_filename_or_reference> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for writing and the uncompressed data will be written to it. =item A filehandle If the C<$output_filename_or_reference> parameter is a filehandle, the uncompressed data will be written to it. The string '-' can be used as an alias for standard output. =item A scalar reference If C<$output_filename_or_reference> is a scalar reference, the uncompressed data will be stored in C<$$output_filename_or_reference>. =item An Array Reference If C<$output_filename_or_reference> is an array reference, the uncompressed data will be pushed onto the array. =item An Output FileGlob If C<$output_filename_or_reference> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The output is the list of files that match the fileglob. When C<$output_filename_or_reference> is an fileglob string, C<$input_filename_or_reference> must also be a fileglob string. Anything else is an error. See L for more details. =back If the C<$output_filename_or_reference> parameter is any other type, C will be returned. =head2 Notes When C<$input_filename_or_reference> maps to multiple compressed files/buffers and C<$output_filename_or_reference> is a single file/buffer, after uncompression C<$output_filename_or_reference> will contain a concatenation of all the uncompressed data from each of the input files/buffers. =head2 Optional Parameters Unless specified below, the optional parameters for C, C, are the same as those used with the OO interface defined in the L section below. =over 5 =item C<< AutoClose => 0|1 >> This option applies to any input or output data streams to C that are filehandles. If C is specified, and the value is true, it will result in all input and/or output filehandles being closed once C has completed. This parameter defaults to 0. =item C<< BinModeOut => 0|1 >> When writing to a file or filehandle, set C before writing to the file. Defaults to 0. =item C<< Append => 0|1 >> The behaviour of this option is dependent on the type of output data stream. =over 5 =item * A Buffer If C is enabled, all uncompressed data will be append to the end of the output buffer. Otherwise the output buffer will be cleared before any uncompressed data is written to it. =item * A Filename If C is enabled, the file will be opened in append mode. Otherwise the contents of the file, if any, will be truncated before any uncompressed data is written to it. =item * A Filehandle If C is enabled, the filehandle will be positioned to the end of the file via a call to C before any uncompressed data is written to it. Otherwise the file pointer will not be moved. =back When C is specified, and set to true, it will I all uncompressed data to the output data stream. So when the output is a filehandle it will carry out a seek to the eof before writing any uncompressed data. If the output is a filename, it will be opened for appending. If the output is a buffer, all uncompressed data will be appended to the existing buffer. Conversely when C is not specified, or it is present and is set to false, it will operate as follows. When the output is a filename, it will truncate the contents of the file before writing any uncompressed data. If the output is a filehandle its position will not be changed. If the output is a buffer, it will be wiped before any uncompressed data is output. Defaults to 0. =item C<< MultiStream => 0|1 >> If the input file/buffer contains multiple compressed data streams, this option will uncompress the whole lot as a single data stream. Defaults to 0. =item C<< TrailingData => $scalar >> Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option. =back =head2 Examples To read the contents of the file C and write the uncompressed data to the file C. use strict ; use warnings ; use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ; my $input = "file1.txt.Compressed"; my $output = "file1.txt"; anyinflate $input => $output or die "anyinflate failed: $AnyInflateError\n"; To read from an existing Perl filehandle, C<$input>, and write the uncompressed data to a buffer, C<$buffer>. use strict ; use warnings ; use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ; use IO::File ; my $input = new IO::File " \$buffer or die "anyinflate failed: $AnyInflateError\n"; To uncompress all files in the directory "/my/home" that match "*.txt.Compressed" and store the compressed data in the same directory use strict ; use warnings ; use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ; anyinflate '' => '' or die "anyinflate failed: $AnyInflateError\n"; and if you want to compress each file one at a time, this will do the trick use strict ; use warnings ; use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ; for my $input ( glob "/my/home/*.txt.Compressed" ) { my $output = $input; $output =~ s/.Compressed// ; anyinflate $input => $output or die "Error compressing '$input': $AnyInflateError\n"; } =head1 OO Interface =head2 Constructor The format of the constructor for IO::Uncompress::AnyInflate is shown below my $z = new IO::Uncompress::AnyInflate $input [OPTS] or die "IO::Uncompress::AnyInflate failed: $AnyInflateError\n"; Returns an C object on success and undef on failure. The variable C<$AnyInflateError> will contain an error message on failure. If you are running Perl 5.005 or better the object, C<$z>, returned from IO::Uncompress::AnyInflate can be used exactly like an L filehandle. This means that all normal input file operations can be carried out with C<$z>. For example, to read a line from a compressed file/buffer you can use either of these forms $line = $z->getline(); $line = <$z>; The mandatory parameter C<$input> is used to determine the source of the compressed data. This parameter can take one of three forms. =over 5 =item A filename If the C<$input> parameter is a scalar, it is assumed to be a filename. This file will be opened for reading and the compressed data will be read from it. =item A filehandle If the C<$input> parameter is a filehandle, the compressed data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input> is a scalar reference, the compressed data will be read from C<$$input>. =back =head2 Constructor Options The option names defined below are case insensitive and can be optionally prefixed by a '-'. So all of the following are valid -AutoClose -autoclose AUTOCLOSE autoclose OPTS is a combination of the following options: =over 5 =item C<< AutoClose => 0|1 >> This option is only valid when the C<$input> parameter is a filehandle. If specified, and the value is true, it will result in the file being closed once either the C method is called or the IO::Uncompress::AnyInflate object is destroyed. This parameter defaults to 0. =item C<< MultiStream => 0|1 >> Allows multiple concatenated compressed streams to be treated as a single compressed stream. Decompression will stop once either the end of the file/buffer is reached, an error is encountered (premature eof, corrupt compressed data) or the end of a stream is not immediately followed by the start of another stream. This parameter defaults to 0. =item C<< Prime => $string >> This option will uncompress the contents of C<$string> before processing the input file/buffer. This option can be useful when the compressed data is embedded in another file/data structure and it is not possible to work out where the compressed data begins without having to read the first few bytes. If this is the case, the uncompression can be I with these bytes using this option. =item C<< Transparent => 0|1 >> If this option is set and the input file/buffer is not compressed data, the module will allow reading of it anyway. In addition, if the input file/buffer does contain compressed data and there is non-compressed data immediately following it, setting this option will make this module treat the whole file/buffer as a single data stream. This option defaults to 1. =item C<< BlockSize => $num >> When reading the compressed input data, IO::Uncompress::AnyInflate will read it in blocks of C<$num> bytes. This option defaults to 4096. =item C<< InputLength => $size >> When present this option will limit the number of compressed bytes read from the input file/buffer to C<$size>. This option can be used in the situation where there is useful data directly after the compressed data stream and you know beforehand the exact length of the compressed data stream. This option is mostly used when reading from a filehandle, in which case the file pointer will be left pointing to the first byte directly after the compressed data stream. This option defaults to off. =item C<< Append => 0|1 >> This option controls what the C method does with uncompressed data. If set to 1, all uncompressed data will be appended to the output parameter of the C method. If set to 0, the contents of the output parameter of the C method will be overwritten by the uncompressed data. Defaults to 0. =item C<< Strict => 0|1 >> This option controls whether the extra checks defined below are used when carrying out the decompression. When Strict is on, the extra tests are carried out, when Strict is off they are not. The default for this option is off. If the input is an RFC 1950 data stream, the following will be checked: =over 5 =item 1 The ADLER32 checksum field must be present. =item 2 The value of the ADLER32 field read must match the adler32 value of the uncompressed data actually contained in the file. =back If the input is a gzip (RFC 1952) data stream, the following will be checked: =over 5 =item 1 If the FHCRC bit is set in the gzip FLG header byte, the CRC16 bytes in the header must match the crc16 value of the gzip header actually read. =item 2 If the gzip header contains a name field (FNAME) it consists solely of ISO 8859-1 characters. =item 3 If the gzip header contains a comment field (FCOMMENT) it consists solely of ISO 8859-1 characters plus line-feed. =item 4 If the gzip FEXTRA header field is present it must conform to the sub-field structure as defined in RFC 1952. =item 5 The CRC32 and ISIZE trailer fields must be present. =item 6 The value of the CRC32 field read must match the crc32 value of the uncompressed data actually contained in the gzip file. =item 7 The value of the ISIZE fields read must match the length of the uncompressed data actually read from the file. =back =item C<< RawInflate => 0|1 >> When auto-detecting the compressed format, try to test for raw-deflate (RFC 1951) content using the C module. The reason this is not default behaviour is because RFC 1951 content can only be detected by attempting to uncompress it. This process is error prone and can result is false positives. Defaults to 0. =item C<< ParseExtra => 0|1 >> If the gzip FEXTRA header field is present and this option is set, it will force the module to check that it conforms to the sub-field structure as defined in RFC 1952. If the C is on it will automatically enable this option. Defaults to 0. =back =head2 Examples TODO =head1 Methods =head2 read Usage is $status = $z->read($buffer) Reads a block of compressed data (the size the the compressed block is determined by the C option in the constructor), uncompresses it and writes any uncompressed data into C<$buffer>. If the C parameter is set in the constructor, the uncompressed data will be appended to the C<$buffer> parameter. Otherwise C<$buffer> will be overwritten. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 read Usage is $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $status = read($z, $buffer, $length) $status = read($z, $buffer, $length, $offset) Attempt to read C<$length> bytes of uncompressed data into C<$buffer>. The main difference between this form of the C method and the previous one, is that this one will attempt to return I C<$length> bytes. The only circumstances that this function will not is if end-of-file or an IO error is encountered. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 getline Usage is $line = $z->getline() $line = <$z> Reads a single line. This method fully supports the use of of the variable C<$/> (or C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C is in use) to determine what constitutes an end of line. Paragraph mode, record mode and file slurp mode are all supported. =head2 getc Usage is $char = $z->getc() Read a single character. =head2 ungetc Usage is $char = $z->ungetc($string) =head2 inflateSync Usage is $status = $z->inflateSync() TODO =head2 getHeaderInfo Usage is $hdr = $z->getHeaderInfo(); @hdrs = $z->getHeaderInfo(); This method returns either a hash reference (in scalar context) or a list or hash references (in array context) that contains information about each of the header fields in the compressed data stream(s). =head2 tell Usage is $z->tell() tell $z Returns the uncompressed file offset. =head2 eof Usage is $z->eof(); eof($z); Returns true if the end of the compressed input stream has been reached. =head2 seek $z->seek($position, $whence); seek($z, $position, $whence); Provides a sub-set of the C functionality, with the restriction that it is only legal to seek forward in the input file/buffer. It is a fatal error to attempt to seek backward. Note that the implementation of C in this module does not provide true random access to a compressed file/buffer. It works by uncompressing data from the current offset in the file/buffer until it reaches the ucompressed offset specified in the parameters to C. For very small files this may be acceptable behaviour. For large files it may cause an unacceptable delay. The C<$whence> parameter takes one the usual values, namely SEEK_SET, SEEK_CUR or SEEK_END. Returns 1 on success, 0 on failure. =head2 binmode Usage is $z->binmode binmode $z ; This is a noop provided for completeness. =head2 opened $z->opened() Returns true if the object currently refers to a opened file/buffer. =head2 autoflush my $prev = $z->autoflush() my $prev = $z->autoflush(EXPR) If the C<$z> object is associated with a file or a filehandle, this method returns the current autoflush setting for the underlying filehandle. If C is present, and is non-zero, it will enable flushing after every write/print operation. If C<$z> is associated with a buffer, this method has no effect and always returns C. B that the special variable C<$|> B be used to set or retrieve the autoflush setting. =head2 input_line_number $z->input_line_number() $z->input_line_number(EXPR) Returns the current uncompressed line number. If C is present it has the effect of setting the line number. Note that setting the line number does not change the current position within the file/buffer being read. The contents of C<$/> are used to to determine what constitutes a line terminator. =head2 fileno $z->fileno() fileno($z) If the C<$z> object is associated with a file or a filehandle, C will return the underlying file descriptor. Once the C method is called C will return C. If the C<$z> object is associated with a buffer, this method will return C. =head2 close $z->close() ; close $z ; Closes the output file/buffer. For most versions of Perl this method will be automatically invoked if the IO::Uncompress::AnyInflate object is destroyed (either explicitly or by the variable with the reference to the object going out of scope). The exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In these cases, the C method will be called automatically, but not until global destruction of all live objects when the program is terminating. Therefore, if you want your scripts to be able to run on all versions of Perl, you should call C explicitly and not rely on automatic closing. Returns true on success, otherwise 0. If the C option has been enabled when the IO::Uncompress::AnyInflate object was created, and the object is associated with a file, the underlying file will also be closed. =head2 nextStream Usage is my $status = $z->nextStream(); Skips to the next compressed data stream in the input file/buffer. If a new compressed data stream is found, the eof marker will be cleared and C<$.> will be reset to 0. Returns 1 if a new stream was found, 0 if none was found, and -1 if an error was encountered. =head2 trailingData Usage is my $data = $z->trailingData(); Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. It only makes sense to call this method once the end of the compressed data stream has been encountered. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option in the constructor. =head1 Importing No symbolic constants are required by this IO::Uncompress::AnyInflate at present. =over 5 =item :all Imports C and C<$AnyInflateError>. Same as doing this use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ; =back =head1 EXAMPLES =head2 Working with Net::FTP See L =head1 SEE ALSO L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L L L, L, L, L For RFC 1950, 1951 and 1952 see F, F and F The I compression library was written by Jean-loup Gailly F and Mark Adler F. The primary site for the I compression library is F. The primary site for gzip is F. =head1 AUTHOR This module was written by Paul Marquess, F. =head1 MODIFICATION HISTORY See the Changes file. =head1 COPYRIGHT AND LICENSE Copyright (c) 2005-2013 Paul Marquess. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Adapter/Bunzip2.pm000064400000003772147634506620010041 0ustar00package IO::Uncompress::Adapter::Bunzip2; use strict; use warnings; use bytes; use IO::Compress::Base::Common 2.061 qw(:Status); use Compress::Raw::Bzip2 2.061 ; our ($VERSION, @ISA); $VERSION = '2.061'; sub mkUncompObject { my $small = shift || 0; my $verbosity = shift || 0; my ($inflate, $status) = new Compress::Raw::Bunzip2(1, 1, $small, $verbosity, 1); return (undef, "Could not create Inflation object: $status", $status) if $status != BZ_OK ; return bless {'Inf' => $inflate, 'CompSize' => 0, 'UnCompSize' => 0, 'Error' => '', 'ConsumesInput' => 1, } ; } sub uncompr { my $self = shift ; my $from = shift ; my $to = shift ; my $eof = shift ; my $inf = $self->{Inf}; my $status = $inf->bzinflate($from, $to); $self->{ErrorNo} = $status; if ($status != BZ_OK && $status != BZ_STREAM_END ) { $self->{Error} = "Inflation Error: $status"; return STATUS_ERROR; } return STATUS_OK if $status == BZ_OK ; return STATUS_ENDSTREAM if $status == BZ_STREAM_END ; return STATUS_ERROR ; } sub reset { my $self = shift ; my ($inf, $status) = new Compress::Raw::Bunzip2(); $self->{ErrorNo} = ($status == BZ_OK) ? 0 : $status ; if ($status != BZ_OK) { $self->{Error} = "Cannot create Inflate object: $status"; return STATUS_ERROR; } $self->{Inf} = $inf; return STATUS_OK ; } sub compressedBytes { my $self = shift ; $self->{Inf}->compressedBytes(); } sub uncompressedBytes { my $self = shift ; $self->{Inf}->uncompressedBytes(); } sub crc32 { my $self = shift ; #$self->{Inf}->crc32(); } sub adler32 { my $self = shift ; #$self->{Inf}->adler32(); } sub sync { my $self = shift ; #( $self->{Inf}->inflateSync(@_) == BZ_OK) # ? STATUS_OK # : STATUS_ERROR ; } 1; __END__ Adapter/Identity.pm000064400000010747147634506620010301 0ustar00package IO::Uncompress::Adapter::Identity; use warnings; use strict; use bytes; use IO::Compress::Base::Common 2.061 qw(:Status); use IO::Compress::Zip::Constants ; our ($VERSION); $VERSION = '2.061'; use Compress::Raw::Zlib 2.061 (); sub mkUncompObject { my $streaming = shift; my $zip64 = shift; my $crc32 = 1; #shift ; my $adler32 = shift; bless { 'CompSize' => new U64 , # 0, 'UnCompSize' => 0, 'wantCRC32' => $crc32, 'CRC32' => Compress::Raw::Zlib::crc32(''), 'wantADLER32'=> $adler32, 'ADLER32' => Compress::Raw::Zlib::adler32(''), 'ConsumesInput' => 1, 'Streaming' => $streaming, 'Zip64' => $zip64, 'DataHdrSize' => $zip64 ? 24 : 16, 'Pending' => '', } ; } sub uncompr { my $self = shift; my $in = $_[0]; my $eof = $_[2]; my $len = length $$in; my $remainder = ''; if (defined $$in && $len) { if ($self->{Streaming}) { if (length $self->{Pending}) { $$in = $self->{Pending} . $$in ; $len = length $$in; $self->{Pending} = ''; } my $ind = index($$in, "\x50\x4b\x07\x08"); if ($ind < 0) { $len = length $$in; if ($len >= 3 && substr($$in, -3) eq "\x50\x4b\x07") { $ind = $len - 3 ; } elsif ($len >= 2 && substr($$in, -2) eq "\x50\x4b") { $ind = $len - 2 ; } elsif ($len >= 1 && substr($$in, -1) eq "\x50") { $ind = $len - 1 ; } } if ($ind >= 0) { $remainder = substr($$in, $ind) ; substr($$in, $ind) = '' ; } } if (length $remainder && length $remainder < $self->{DataHdrSize}) { $self->{Pending} = $remainder ; $remainder = ''; } elsif (length $remainder >= $self->{DataHdrSize}) { my $crc = unpack "V", substr($remainder, 4); if ($crc == Compress::Raw::Zlib::crc32($$in, $self->{CRC32})) { my ($l1, $l2) ; if ($self->{Zip64}) { $l1 = U64::newUnpack_V64(substr($remainder, 8)); $l2 = U64::newUnpack_V64(substr($remainder, 16)); } else { $l1 = U64::newUnpack_V32(substr($remainder, 8)); $l2 = U64::newUnpack_V32(substr($remainder, 12)); } my $newLen = $self->{CompSize}->clone(); $newLen->add(length $$in); if ($l1->equal($l2) && $l1->equal($newLen) ) { $eof = 1; } else { $$in .= substr($remainder, 0, 4) ; $remainder = substr($remainder, 4); #$self->{Pending} = substr($remainder, 4); #$remainder = ''; $eof = 0; } } else { $$in .= substr($remainder, 0, 4) ; $remainder = substr($remainder, 4); #$self->{Pending} = substr($remainder, 4); #$remainder = ''; $eof = 0; } } if (length $$in) { $self->{CompSize}->add(length $$in) ; $self->{CRC32} = Compress::Raw::Zlib::crc32($$in, $self->{CRC32}) if $self->{wantCRC32}; $self->{ADLER32} = Compress::Zlib::adler32($$in, $self->{ADLER32}) if $self->{wantADLER32}; } ${ $_[1] } .= $$in; $$in = $remainder; } return STATUS_ENDSTREAM if $eof; return STATUS_OK ; } sub reset { my $self = shift; $self->{CompSize} = 0; $self->{UnCompSize} = 0; $self->{CRC32} = Compress::Raw::Zlib::crc32(''); $self->{ADLER32} = Compress::Raw::Zlib::adler32(''); return STATUS_OK ; } #sub count #{ # my $self = shift ; # return $self->{UnCompSize} ; #} sub compressedBytes { my $self = shift ; return $self->{CompSize} ; } sub uncompressedBytes { my $self = shift ; return $self->{CompSize} ; } sub sync { return STATUS_OK ; } sub crc32 { my $self = shift ; return $self->{CRC32}; } sub adler32 { my $self = shift ; return $self->{ADLER32}; } 1; __END__ Adapter/Inflate.pm000064400000006320147634506620010062 0ustar00package IO::Uncompress::Adapter::Inflate; use strict; use warnings; #use bytes; use IO::Compress::Base::Common 2.061 qw(:Status); use Compress::Raw::Zlib 2.061 qw(Z_OK Z_BUF_ERROR Z_STREAM_END Z_FINISH MAX_WBITS); our ($VERSION); $VERSION = '2.061'; sub mkUncompObject { my $crc32 = shift || 1; my $adler32 = shift || 1; my $scan = shift || 0; my $inflate ; my $status ; if ($scan) { ($inflate, $status) = new Compress::Raw::Zlib::InflateScan #LimitOutput => 1, CRC32 => $crc32, ADLER32 => $adler32, WindowBits => - MAX_WBITS ; } else { ($inflate, $status) = new Compress::Raw::Zlib::Inflate AppendOutput => 1, LimitOutput => 1, CRC32 => $crc32, ADLER32 => $adler32, WindowBits => - MAX_WBITS ; } return (undef, "Could not create Inflation object: $status", $status) if $status != Z_OK ; return bless {'Inf' => $inflate, 'CompSize' => 0, 'UnCompSize' => 0, 'Error' => '', 'ConsumesInput' => 1, } ; } sub uncompr { my $self = shift ; my $from = shift ; my $to = shift ; my $eof = shift ; my $inf = $self->{Inf}; my $status = $inf->inflate($from, $to, $eof); $self->{ErrorNo} = $status; if ($status != Z_OK && $status != Z_STREAM_END && $status != Z_BUF_ERROR) { $self->{Error} = "Inflation Error: $status"; return STATUS_ERROR; } return STATUS_OK if $status == Z_BUF_ERROR ; # ??? return STATUS_OK if $status == Z_OK ; return STATUS_ENDSTREAM if $status == Z_STREAM_END ; return STATUS_ERROR ; } sub reset { my $self = shift ; $self->{Inf}->inflateReset(); return STATUS_OK ; } #sub count #{ # my $self = shift ; # $self->{Inf}->inflateCount(); #} sub crc32 { my $self = shift ; $self->{Inf}->crc32(); } sub compressedBytes { my $self = shift ; $self->{Inf}->compressedBytes(); } sub uncompressedBytes { my $self = shift ; $self->{Inf}->uncompressedBytes(); } sub adler32 { my $self = shift ; $self->{Inf}->adler32(); } sub sync { my $self = shift ; ( $self->{Inf}->inflateSync(@_) == Z_OK) ? STATUS_OK : STATUS_ERROR ; } sub getLastBlockOffset { my $self = shift ; $self->{Inf}->getLastBlockOffset(); } sub getEndOffset { my $self = shift ; $self->{Inf}->getEndOffset(); } sub resetLastBlockByte { my $self = shift ; $self->{Inf}->resetLastBlockByte(@_); } sub createDeflateStream { my $self = shift ; my $deflate = $self->{Inf}->createDeflateStream(@_); return bless {'Def' => $deflate, 'CompSize' => 0, 'UnCompSize' => 0, 'Error' => '', }, 'IO::Compress::Adapter::Deflate'; } 1; __END__ RawInflate.pm000064400000075572147634506620007173 0ustar00package IO::Uncompress::RawInflate ; # for RFC1951 use strict ; use warnings; #use bytes; use Compress::Raw::Zlib 2.061 ; use IO::Compress::Base::Common 2.061 qw(:Status ); use IO::Uncompress::Base 2.061 ; use IO::Uncompress::Adapter::Inflate 2.061 ; require Exporter ; our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, %DEFLATE_CONSTANTS, $RawInflateError); $VERSION = '2.061'; $RawInflateError = ''; @ISA = qw( Exporter IO::Uncompress::Base ); @EXPORT_OK = qw( $RawInflateError rawinflate ) ; %DEFLATE_CONSTANTS = (); %EXPORT_TAGS = %IO::Uncompress::Base::EXPORT_TAGS ; push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ; Exporter::export_ok_tags('all'); #{ # # Execute at runtime # my %bad; # for my $module (qw(Compress::Raw::Zlib IO::Compress::Base::Common IO::Uncompress::Base IO::Uncompress::Adapter::Inflate)) # { # my $ver = ${ $module . "::VERSION"} ; # # $bad{$module} = $ver # if $ver ne $VERSION; # } # # if (keys %bad) # { # my $string = join "\n", map { "$_ $bad{$_}" } keys %bad; # die caller(0)[0] . "needs version $VERSION mismatch\n$string\n"; # } #} sub new { my $class = shift ; my $obj = IO::Compress::Base::Common::createSelfTiedObject($class, \$RawInflateError); $obj->_create(undef, 0, @_); } sub rawinflate { my $obj = IO::Compress::Base::Common::createSelfTiedObject(undef, \$RawInflateError); return $obj->_inf(@_); } sub getExtraParams { return (); } sub ckParams { my $self = shift ; my $got = shift ; return 1; } sub mkUncomp { my $self = shift ; my $got = shift ; my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::Inflate::mkUncompObject( $got->getValue('crc32'), $got->getValue('adler32'), $got->getValue('scan'), ); return $self->saveErrorString(undef, $errstr, $errno) if ! defined $obj; *$self->{Uncomp} = $obj; my $magic = $self->ckMagic() or return 0; *$self->{Info} = $self->readHeader($magic) or return undef ; return 1; } sub ckMagic { my $self = shift; return $self->_isRaw() ; } sub readHeader { my $self = shift; my $magic = shift ; return { 'Type' => 'rfc1951', 'FingerprintLength' => 0, 'HeaderLength' => 0, 'TrailerLength' => 0, 'Header' => '' }; } sub chkTrailer { return STATUS_OK ; } sub _isRaw { my $self = shift ; my $got = $self->_isRawx(@_); if ($got) { *$self->{Pending} = *$self->{HeaderPending} ; } else { $self->pushBack(*$self->{HeaderPending}); *$self->{Uncomp}->reset(); } *$self->{HeaderPending} = ''; return $got ; } sub _isRawx { my $self = shift ; my $magic = shift ; $magic = '' unless defined $magic ; my $buffer = ''; $self->smartRead(\$buffer, *$self->{BlockSize}) >= 0 or return $self->saveErrorString(undef, "No data to read"); my $temp_buf = $magic . $buffer ; *$self->{HeaderPending} = $temp_buf ; $buffer = ''; my $status = *$self->{Uncomp}->uncompr(\$temp_buf, \$buffer, $self->smartEof()) ; return $self->saveErrorString(undef, *$self->{Uncomp}{Error}, STATUS_ERROR) if $status == STATUS_ERROR; $self->pushBack($temp_buf) ; return $self->saveErrorString(undef, "unexpected end of file", STATUS_ERROR) if $self->smartEof() && $status != STATUS_ENDSTREAM; #my $buf_len = *$self->{Uncomp}->uncompressedBytes(); my $buf_len = length $buffer; if ($status == STATUS_ENDSTREAM) { if (*$self->{MultiStream} && (length $temp_buf || ! $self->smartEof())){ *$self->{NewStream} = 1 ; *$self->{EndStream} = 0 ; } else { *$self->{EndStream} = 1 ; } } *$self->{HeaderPending} = $buffer ; *$self->{InflatedBytesRead} = $buf_len ; *$self->{TotalInflatedBytesRead} += $buf_len ; *$self->{Type} = 'rfc1951'; $self->saveStatus(STATUS_OK); return { 'Type' => 'rfc1951', 'HeaderLength' => 0, 'TrailerLength' => 0, 'Header' => '' }; } sub inflateSync { my $self = shift ; # inflateSync is a no-op in Plain mode return 1 if *$self->{Plain} ; return 0 if *$self->{Closed} ; #return G_EOF if !length *$self->{Pending} && *$self->{EndStream} ; return 0 if ! length *$self->{Pending} && *$self->{EndStream} ; # Disable CRC check *$self->{Strict} = 0 ; my $status ; while (1) { my $temp_buf ; if (length *$self->{Pending} ) { $temp_buf = *$self->{Pending} ; *$self->{Pending} = ''; } else { $status = $self->smartRead(\$temp_buf, *$self->{BlockSize}) ; return $self->saveErrorString(0, "Error Reading Data") if $status < 0 ; if ($status == 0 ) { *$self->{EndStream} = 1 ; return $self->saveErrorString(0, "unexpected end of file", STATUS_ERROR); } } $status = *$self->{Uncomp}->sync($temp_buf) ; if ($status == STATUS_OK) { *$self->{Pending} .= $temp_buf ; return 1 ; } last unless $status == STATUS_ERROR ; } return 0; } #sub performScan #{ # my $self = shift ; # # my $status ; # my $end_offset = 0; # # $status = $self->scan() # #or return $self->saveErrorString(undef, "Error Scanning: $$error_ref", $self->errorNo) ; # or return $self->saveErrorString(G_ERR, "Error Scanning: $status") # # $status = $self->zap($end_offset) # or return $self->saveErrorString(G_ERR, "Error Zapping: $status"); # #or return $self->saveErrorString(undef, "Error Zapping: $$error_ref", $self->errorNo) ; # # #(*$obj->{Deflate}, $status) = $inf->createDeflate(); # ## *$obj->{Header} = *$inf->{Info}{Header}; ## *$obj->{UnCompSize_32bit} = ## *$obj->{BytesWritten} = *$inf->{UnCompSize_32bit} ; ## *$obj->{CompSize_32bit} = *$inf->{CompSize_32bit} ; # # ## if ( $outType eq 'buffer') ## { substr( ${ *$self->{Buffer} }, $end_offset) = '' } ## elsif ($outType eq 'handle' || $outType eq 'filename') { ## *$self->{FH} = *$inf->{FH} ; ## delete *$inf->{FH}; ## *$obj->{FH}->flush() ; ## *$obj->{Handle} = 1 if $outType eq 'handle'; ## ## #seek(*$obj->{FH}, $end_offset, SEEK_SET) ## *$obj->{FH}->seek($end_offset, SEEK_SET) ## or return $obj->saveErrorString(undef, $!, $!) ; ## } # #} sub scan { my $self = shift ; return 1 if *$self->{Closed} ; return 1 if !length *$self->{Pending} && *$self->{EndStream} ; my $buffer = '' ; my $len = 0; $len = $self->_raw_read(\$buffer, 1) while ! *$self->{EndStream} && $len >= 0 ; #return $len if $len < 0 ? $len : 0 ; return $len < 0 ? 0 : 1 ; } sub zap { my $self = shift ; my $headerLength = *$self->{Info}{HeaderLength}; my $block_offset = $headerLength + *$self->{Uncomp}->getLastBlockOffset(); $_[0] = $headerLength + *$self->{Uncomp}->getEndOffset(); #printf "# End $_[0], headerlen $headerLength \n";; #printf "# block_offset $block_offset %x\n", $block_offset; my $byte ; ( $self->smartSeek($block_offset) && $self->smartRead(\$byte, 1) ) or return $self->saveErrorString(0, $!, $!); #printf "#byte is %x\n", unpack('C*',$byte); *$self->{Uncomp}->resetLastBlockByte($byte); #printf "#to byte is %x\n", unpack('C*',$byte); ( $self->smartSeek($block_offset) && $self->smartWrite($byte) ) or return $self->saveErrorString(0, $!, $!); #$self->smartSeek($end_offset, 1); return 1 ; } sub createDeflate { my $self = shift ; my ($def, $status) = *$self->{Uncomp}->createDeflateStream( -AppendOutput => 1, -WindowBits => - MAX_WBITS, -CRC32 => *$self->{Params}->getValue('crc32'), -ADLER32 => *$self->{Params}->getValue('adler32'), ); return wantarray ? ($status, $def) : $def ; } 1; __END__ =head1 NAME IO::Uncompress::RawInflate - Read RFC 1951 files/buffers =head1 SYNOPSIS use IO::Uncompress::RawInflate qw(rawinflate $RawInflateError) ; my $status = rawinflate $input => $output [,OPTS] or die "rawinflate failed: $RawInflateError\n"; my $z = new IO::Uncompress::RawInflate $input [OPTS] or die "rawinflate failed: $RawInflateError\n"; $status = $z->read($buffer) $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $line = $z->getline() $char = $z->getc() $char = $z->ungetc() $char = $z->opened() $status = $z->inflateSync() $data = $z->trailingData() $status = $z->nextStream() $data = $z->getHeaderInfo() $z->tell() $z->seek($position, $whence) $z->binmode() $z->fileno() $z->eof() $z->close() $RawInflateError ; # IO::File mode <$z> read($z, $buffer); read($z, $buffer, $length); read($z, $buffer, $length, $offset); tell($z) seek($z, $position, $whence) binmode($z) fileno($z) eof($z) close($z) =head1 DESCRIPTION This module provides a Perl interface that allows the reading of files/buffers that conform to RFC 1951. For writing RFC 1951 files/buffers, see the companion module IO::Compress::RawDeflate. =head1 Functional Interface A top-level function, C, is provided to carry out "one-shot" uncompression between buffers and/or files. For finer control over the uncompression process, see the L section. use IO::Uncompress::RawInflate qw(rawinflate $RawInflateError) ; rawinflate $input_filename_or_reference => $output_filename_or_reference [,OPTS] or die "rawinflate failed: $RawInflateError\n"; The functional interface needs Perl5.005 or better. =head2 rawinflate $input_filename_or_reference => $output_filename_or_reference [, OPTS] C expects at least two parameters, C<$input_filename_or_reference> and C<$output_filename_or_reference>. =head3 The C<$input_filename_or_reference> parameter The parameter, C<$input_filename_or_reference>, is used to define the source of the compressed data. It can take one of the following forms: =over 5 =item A filename If the <$input_filename_or_reference> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for reading and the input data will be read from it. =item A filehandle If the C<$input_filename_or_reference> parameter is a filehandle, the input data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input_filename_or_reference> is a scalar reference, the input data will be read from C<$$input_filename_or_reference>. =item An array reference If C<$input_filename_or_reference> is an array reference, each element in the array must be a filename. The input data will be read from each file in turn. The complete array will be walked to ensure that it only contains valid filenames before any data is uncompressed. =item An Input FileGlob string If C<$input_filename_or_reference> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The input is the list of files that match the fileglob. See L for more details. =back If the C<$input_filename_or_reference> parameter is any other type, C will be returned. =head3 The C<$output_filename_or_reference> parameter The parameter C<$output_filename_or_reference> is used to control the destination of the uncompressed data. This parameter can take one of these forms. =over 5 =item A filename If the C<$output_filename_or_reference> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for writing and the uncompressed data will be written to it. =item A filehandle If the C<$output_filename_or_reference> parameter is a filehandle, the uncompressed data will be written to it. The string '-' can be used as an alias for standard output. =item A scalar reference If C<$output_filename_or_reference> is a scalar reference, the uncompressed data will be stored in C<$$output_filename_or_reference>. =item An Array Reference If C<$output_filename_or_reference> is an array reference, the uncompressed data will be pushed onto the array. =item An Output FileGlob If C<$output_filename_or_reference> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The output is the list of files that match the fileglob. When C<$output_filename_or_reference> is an fileglob string, C<$input_filename_or_reference> must also be a fileglob string. Anything else is an error. See L for more details. =back If the C<$output_filename_or_reference> parameter is any other type, C will be returned. =head2 Notes When C<$input_filename_or_reference> maps to multiple compressed files/buffers and C<$output_filename_or_reference> is a single file/buffer, after uncompression C<$output_filename_or_reference> will contain a concatenation of all the uncompressed data from each of the input files/buffers. =head2 Optional Parameters Unless specified below, the optional parameters for C, C, are the same as those used with the OO interface defined in the L section below. =over 5 =item C<< AutoClose => 0|1 >> This option applies to any input or output data streams to C that are filehandles. If C is specified, and the value is true, it will result in all input and/or output filehandles being closed once C has completed. This parameter defaults to 0. =item C<< BinModeOut => 0|1 >> When writing to a file or filehandle, set C before writing to the file. Defaults to 0. =item C<< Append => 0|1 >> The behaviour of this option is dependent on the type of output data stream. =over 5 =item * A Buffer If C is enabled, all uncompressed data will be append to the end of the output buffer. Otherwise the output buffer will be cleared before any uncompressed data is written to it. =item * A Filename If C is enabled, the file will be opened in append mode. Otherwise the contents of the file, if any, will be truncated before any uncompressed data is written to it. =item * A Filehandle If C is enabled, the filehandle will be positioned to the end of the file via a call to C before any uncompressed data is written to it. Otherwise the file pointer will not be moved. =back When C is specified, and set to true, it will I all uncompressed data to the output data stream. So when the output is a filehandle it will carry out a seek to the eof before writing any uncompressed data. If the output is a filename, it will be opened for appending. If the output is a buffer, all uncompressed data will be appended to the existing buffer. Conversely when C is not specified, or it is present and is set to false, it will operate as follows. When the output is a filename, it will truncate the contents of the file before writing any uncompressed data. If the output is a filehandle its position will not be changed. If the output is a buffer, it will be wiped before any uncompressed data is output. Defaults to 0. =item C<< MultiStream => 0|1 >> This option is a no-op. =item C<< TrailingData => $scalar >> Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option. =back =head2 Examples To read the contents of the file C and write the uncompressed data to the file C. use strict ; use warnings ; use IO::Uncompress::RawInflate qw(rawinflate $RawInflateError) ; my $input = "file1.txt.1951"; my $output = "file1.txt"; rawinflate $input => $output or die "rawinflate failed: $RawInflateError\n"; To read from an existing Perl filehandle, C<$input>, and write the uncompressed data to a buffer, C<$buffer>. use strict ; use warnings ; use IO::Uncompress::RawInflate qw(rawinflate $RawInflateError) ; use IO::File ; my $input = new IO::File " \$buffer or die "rawinflate failed: $RawInflateError\n"; To uncompress all files in the directory "/my/home" that match "*.txt.1951" and store the compressed data in the same directory use strict ; use warnings ; use IO::Uncompress::RawInflate qw(rawinflate $RawInflateError) ; rawinflate '' => '' or die "rawinflate failed: $RawInflateError\n"; and if you want to compress each file one at a time, this will do the trick use strict ; use warnings ; use IO::Uncompress::RawInflate qw(rawinflate $RawInflateError) ; for my $input ( glob "/my/home/*.txt.1951" ) { my $output = $input; $output =~ s/.1951// ; rawinflate $input => $output or die "Error compressing '$input': $RawInflateError\n"; } =head1 OO Interface =head2 Constructor The format of the constructor for IO::Uncompress::RawInflate is shown below my $z = new IO::Uncompress::RawInflate $input [OPTS] or die "IO::Uncompress::RawInflate failed: $RawInflateError\n"; Returns an C object on success and undef on failure. The variable C<$RawInflateError> will contain an error message on failure. If you are running Perl 5.005 or better the object, C<$z>, returned from IO::Uncompress::RawInflate can be used exactly like an L filehandle. This means that all normal input file operations can be carried out with C<$z>. For example, to read a line from a compressed file/buffer you can use either of these forms $line = $z->getline(); $line = <$z>; The mandatory parameter C<$input> is used to determine the source of the compressed data. This parameter can take one of three forms. =over 5 =item A filename If the C<$input> parameter is a scalar, it is assumed to be a filename. This file will be opened for reading and the compressed data will be read from it. =item A filehandle If the C<$input> parameter is a filehandle, the compressed data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input> is a scalar reference, the compressed data will be read from C<$$input>. =back =head2 Constructor Options The option names defined below are case insensitive and can be optionally prefixed by a '-'. So all of the following are valid -AutoClose -autoclose AUTOCLOSE autoclose OPTS is a combination of the following options: =over 5 =item C<< AutoClose => 0|1 >> This option is only valid when the C<$input> parameter is a filehandle. If specified, and the value is true, it will result in the file being closed once either the C method is called or the IO::Uncompress::RawInflate object is destroyed. This parameter defaults to 0. =item C<< MultiStream => 0|1 >> Allows multiple concatenated compressed streams to be treated as a single compressed stream. Decompression will stop once either the end of the file/buffer is reached, an error is encountered (premature eof, corrupt compressed data) or the end of a stream is not immediately followed by the start of another stream. This parameter defaults to 0. =item C<< Prime => $string >> This option will uncompress the contents of C<$string> before processing the input file/buffer. This option can be useful when the compressed data is embedded in another file/data structure and it is not possible to work out where the compressed data begins without having to read the first few bytes. If this is the case, the uncompression can be I with these bytes using this option. =item C<< Transparent => 0|1 >> If this option is set and the input file/buffer is not compressed data, the module will allow reading of it anyway. In addition, if the input file/buffer does contain compressed data and there is non-compressed data immediately following it, setting this option will make this module treat the whole file/buffer as a single data stream. This option defaults to 1. =item C<< BlockSize => $num >> When reading the compressed input data, IO::Uncompress::RawInflate will read it in blocks of C<$num> bytes. This option defaults to 4096. =item C<< InputLength => $size >> When present this option will limit the number of compressed bytes read from the input file/buffer to C<$size>. This option can be used in the situation where there is useful data directly after the compressed data stream and you know beforehand the exact length of the compressed data stream. This option is mostly used when reading from a filehandle, in which case the file pointer will be left pointing to the first byte directly after the compressed data stream. This option defaults to off. =item C<< Append => 0|1 >> This option controls what the C method does with uncompressed data. If set to 1, all uncompressed data will be appended to the output parameter of the C method. If set to 0, the contents of the output parameter of the C method will be overwritten by the uncompressed data. Defaults to 0. =item C<< Strict => 0|1 >> This option is a no-op. =back =head2 Examples TODO =head1 Methods =head2 read Usage is $status = $z->read($buffer) Reads a block of compressed data (the size the the compressed block is determined by the C option in the constructor), uncompresses it and writes any uncompressed data into C<$buffer>. If the C parameter is set in the constructor, the uncompressed data will be appended to the C<$buffer> parameter. Otherwise C<$buffer> will be overwritten. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 read Usage is $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $status = read($z, $buffer, $length) $status = read($z, $buffer, $length, $offset) Attempt to read C<$length> bytes of uncompressed data into C<$buffer>. The main difference between this form of the C method and the previous one, is that this one will attempt to return I C<$length> bytes. The only circumstances that this function will not is if end-of-file or an IO error is encountered. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 getline Usage is $line = $z->getline() $line = <$z> Reads a single line. This method fully supports the use of of the variable C<$/> (or C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C is in use) to determine what constitutes an end of line. Paragraph mode, record mode and file slurp mode are all supported. =head2 getc Usage is $char = $z->getc() Read a single character. =head2 ungetc Usage is $char = $z->ungetc($string) =head2 inflateSync Usage is $status = $z->inflateSync() TODO =head2 getHeaderInfo Usage is $hdr = $z->getHeaderInfo(); @hdrs = $z->getHeaderInfo(); This method returns either a hash reference (in scalar context) or a list or hash references (in array context) that contains information about each of the header fields in the compressed data stream(s). =head2 tell Usage is $z->tell() tell $z Returns the uncompressed file offset. =head2 eof Usage is $z->eof(); eof($z); Returns true if the end of the compressed input stream has been reached. =head2 seek $z->seek($position, $whence); seek($z, $position, $whence); Provides a sub-set of the C functionality, with the restriction that it is only legal to seek forward in the input file/buffer. It is a fatal error to attempt to seek backward. Note that the implementation of C in this module does not provide true random access to a compressed file/buffer. It works by uncompressing data from the current offset in the file/buffer until it reaches the ucompressed offset specified in the parameters to C. For very small files this may be acceptable behaviour. For large files it may cause an unacceptable delay. The C<$whence> parameter takes one the usual values, namely SEEK_SET, SEEK_CUR or SEEK_END. Returns 1 on success, 0 on failure. =head2 binmode Usage is $z->binmode binmode $z ; This is a noop provided for completeness. =head2 opened $z->opened() Returns true if the object currently refers to a opened file/buffer. =head2 autoflush my $prev = $z->autoflush() my $prev = $z->autoflush(EXPR) If the C<$z> object is associated with a file or a filehandle, this method returns the current autoflush setting for the underlying filehandle. If C is present, and is non-zero, it will enable flushing after every write/print operation. If C<$z> is associated with a buffer, this method has no effect and always returns C. B that the special variable C<$|> B be used to set or retrieve the autoflush setting. =head2 input_line_number $z->input_line_number() $z->input_line_number(EXPR) Returns the current uncompressed line number. If C is present it has the effect of setting the line number. Note that setting the line number does not change the current position within the file/buffer being read. The contents of C<$/> are used to to determine what constitutes a line terminator. =head2 fileno $z->fileno() fileno($z) If the C<$z> object is associated with a file or a filehandle, C will return the underlying file descriptor. Once the C method is called C will return C. If the C<$z> object is associated with a buffer, this method will return C. =head2 close $z->close() ; close $z ; Closes the output file/buffer. For most versions of Perl this method will be automatically invoked if the IO::Uncompress::RawInflate object is destroyed (either explicitly or by the variable with the reference to the object going out of scope). The exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In these cases, the C method will be called automatically, but not until global destruction of all live objects when the program is terminating. Therefore, if you want your scripts to be able to run on all versions of Perl, you should call C explicitly and not rely on automatic closing. Returns true on success, otherwise 0. If the C option has been enabled when the IO::Uncompress::RawInflate object was created, and the object is associated with a file, the underlying file will also be closed. =head2 nextStream Usage is my $status = $z->nextStream(); Skips to the next compressed data stream in the input file/buffer. If a new compressed data stream is found, the eof marker will be cleared and C<$.> will be reset to 0. Returns 1 if a new stream was found, 0 if none was found, and -1 if an error was encountered. =head2 trailingData Usage is my $data = $z->trailingData(); Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. It only makes sense to call this method once the end of the compressed data stream has been encountered. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option in the constructor. =head1 Importing No symbolic constants are required by this IO::Uncompress::RawInflate at present. =over 5 =item :all Imports C and C<$RawInflateError>. Same as doing this use IO::Uncompress::RawInflate qw(rawinflate $RawInflateError) ; =back =head1 EXAMPLES =head2 Working with Net::FTP See L =head1 SEE ALSO L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L L L, L, L, L For RFC 1950, 1951 and 1952 see F, F and F The I compression library was written by Jean-loup Gailly F and Mark Adler F. The primary site for the I compression library is F. The primary site for gzip is F. =head1 AUTHOR This module was written by Paul Marquess, F. =head1 MODIFICATION HISTORY See the Changes file. =head1 COPYRIGHT AND LICENSE Copyright (c) 2005-2013 Paul Marquess. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Base.pm000064400000112301147634506620005767 0ustar00 package IO::Uncompress::Base ; use strict ; use warnings; #use bytes; our (@ISA, $VERSION, @EXPORT_OK, %EXPORT_TAGS); @ISA = qw(Exporter IO::File); $VERSION = '2.061'; use constant G_EOF => 0 ; use constant G_ERR => -1 ; use IO::Compress::Base::Common 2.061 ; use IO::File ; use Symbol; use Scalar::Util (); use List::Util (); use Carp ; %EXPORT_TAGS = ( ); push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ; sub smartRead { my $self = $_[0]; my $out = $_[1]; my $size = $_[2]; $$out = "" ; my $offset = 0 ; my $status = 1; if (defined *$self->{InputLength}) { return 0 if *$self->{InputLengthRemaining} <= 0 ; $size = List::Util::min($size, *$self->{InputLengthRemaining}); } if ( length *$self->{Prime} ) { $$out = substr(*$self->{Prime}, 0, $size) ; substr(*$self->{Prime}, 0, $size) = '' ; if (length $$out == $size) { *$self->{InputLengthRemaining} -= length $$out if defined *$self->{InputLength}; return length $$out ; } $offset = length $$out ; } my $get_size = $size - $offset ; if (defined *$self->{FH}) { if ($offset) { # Not using this # # *$self->{FH}->read($$out, $get_size, $offset); # # because the filehandle may not support the offset parameter # An example is Net::FTP my $tmp = ''; $status = *$self->{FH}->read($tmp, $get_size) ; substr($$out, $offset) = $tmp if defined $status && $status > 0 ; } else { $status = *$self->{FH}->read($$out, $get_size) } } elsif (defined *$self->{InputEvent}) { my $got = 1 ; while (length $$out < $size) { last if ($got = *$self->{InputEvent}->($$out, $get_size)) <= 0; } if (length $$out > $size ) { *$self->{Prime} = substr($$out, $size, length($$out)); substr($$out, $size, length($$out)) = ''; } *$self->{EventEof} = 1 if $got <= 0 ; } else { no warnings 'uninitialized'; my $buf = *$self->{Buffer} ; $$buf = '' unless defined $$buf ; substr($$out, $offset) = substr($$buf, *$self->{BufferOffset}, $get_size); if (*$self->{ConsumeInput}) { substr($$buf, 0, $get_size) = '' } else { *$self->{BufferOffset} += length($$out) - $offset } } *$self->{InputLengthRemaining} -= length($$out) #- $offset if defined *$self->{InputLength}; if (! defined $status) { $self->saveStatus($!) ; return STATUS_ERROR; } $self->saveStatus(length $$out < 0 ? STATUS_ERROR : STATUS_OK) ; return length $$out; } sub pushBack { my $self = shift ; return if ! defined $_[0] || length $_[0] == 0 ; if (defined *$self->{FH} || defined *$self->{InputEvent} ) { *$self->{Prime} = $_[0] . *$self->{Prime} ; *$self->{InputLengthRemaining} += length($_[0]); } else { my $len = length $_[0]; if($len > *$self->{BufferOffset}) { *$self->{Prime} = substr($_[0], 0, $len - *$self->{BufferOffset}) . *$self->{Prime} ; *$self->{InputLengthRemaining} = *$self->{InputLength}; *$self->{BufferOffset} = 0 } else { *$self->{InputLengthRemaining} += length($_[0]); *$self->{BufferOffset} -= length($_[0]) ; } } } sub smartSeek { my $self = shift ; my $offset = shift ; my $truncate = shift; my $position = shift || SEEK_SET; # TODO -- need to take prime into account if (defined *$self->{FH}) { *$self->{FH}->seek($offset, $position) } else { if ($position == SEEK_END) { *$self->{BufferOffset} = length ${ *$self->{Buffer} } + $offset ; } elsif ($position == SEEK_CUR) { *$self->{BufferOffset} += $offset ; } else { *$self->{BufferOffset} = $offset ; } substr(${ *$self->{Buffer} }, *$self->{BufferOffset}) = '' if $truncate; return 1; } } sub smartTell { my $self = shift ; if (defined *$self->{FH}) { return *$self->{FH}->tell() } else { return *$self->{BufferOffset} } } sub smartWrite { my $self = shift ; my $out_data = shift ; if (defined *$self->{FH}) { # flush needed for 5.8.0 defined *$self->{FH}->write($out_data, length $out_data) && defined *$self->{FH}->flush() ; } else { my $buf = *$self->{Buffer} ; substr($$buf, *$self->{BufferOffset}, length $out_data) = $out_data ; *$self->{BufferOffset} += length($out_data) ; return 1; } } sub smartReadExact { return $_[0]->smartRead($_[1], $_[2]) == $_[2]; } sub smartEof { my ($self) = $_[0]; local $.; return 0 if length *$self->{Prime} || *$self->{PushMode}; if (defined *$self->{FH}) { # Could use # # *$self->{FH}->eof() # # here, but this can cause trouble if # the filehandle is itself a tied handle, but it uses sysread. # Then we get into mixing buffered & non-buffered IO, # which will cause trouble my $info = $self->getErrInfo(); my $buffer = ''; my $status = $self->smartRead(\$buffer, 1); $self->pushBack($buffer) if length $buffer; $self->setErrInfo($info); return $status == 0 ; } elsif (defined *$self->{InputEvent}) { *$self->{EventEof} } else { *$self->{BufferOffset} >= length(${ *$self->{Buffer} }) } } sub clearError { my $self = shift ; *$self->{ErrorNo} = 0 ; ${ *$self->{Error} } = '' ; } sub getErrInfo { my $self = shift ; return [ *$self->{ErrorNo}, ${ *$self->{Error} } ] ; } sub setErrInfo { my $self = shift ; my $ref = shift; *$self->{ErrorNo} = $ref->[0] ; ${ *$self->{Error} } = $ref->[1] ; } sub saveStatus { my $self = shift ; my $errno = shift() + 0 ; *$self->{ErrorNo} = $errno; ${ *$self->{Error} } = '' ; return *$self->{ErrorNo} ; } sub saveErrorString { my $self = shift ; my $retval = shift ; ${ *$self->{Error} } = shift ; *$self->{ErrorNo} = @_ ? shift() + 0 : STATUS_ERROR ; return $retval; } sub croakError { my $self = shift ; $self->saveErrorString(0, $_[0]); croak $_[0]; } sub closeError { my $self = shift ; my $retval = shift ; my $errno = *$self->{ErrorNo}; my $error = ${ *$self->{Error} }; $self->close(); *$self->{ErrorNo} = $errno ; ${ *$self->{Error} } = $error ; return $retval; } sub error { my $self = shift ; return ${ *$self->{Error} } ; } sub errorNo { my $self = shift ; return *$self->{ErrorNo}; } sub HeaderError { my ($self) = shift; return $self->saveErrorString(undef, "Header Error: $_[0]", STATUS_ERROR); } sub TrailerError { my ($self) = shift; return $self->saveErrorString(G_ERR, "Trailer Error: $_[0]", STATUS_ERROR); } sub TruncatedHeader { my ($self) = shift; return $self->HeaderError("Truncated in $_[0] Section"); } sub TruncatedTrailer { my ($self) = shift; return $self->TrailerError("Truncated in $_[0] Section"); } sub postCheckParams { return 1; } sub checkParams { my $self = shift ; my $class = shift ; my $got = shift || IO::Compress::Base::Parameters::new(); my $Valid = { 'blocksize' => [IO::Compress::Base::Common::Parse_unsigned, 16 * 1024], 'autoclose' => [IO::Compress::Base::Common::Parse_boolean, 0], 'strict' => [IO::Compress::Base::Common::Parse_boolean, 0], 'append' => [IO::Compress::Base::Common::Parse_boolean, 0], 'prime' => [IO::Compress::Base::Common::Parse_any, undef], 'multistream' => [IO::Compress::Base::Common::Parse_boolean, 0], 'transparent' => [IO::Compress::Base::Common::Parse_any, 1], 'scan' => [IO::Compress::Base::Common::Parse_boolean, 0], 'inputlength' => [IO::Compress::Base::Common::Parse_unsigned, undef], 'binmodeout' => [IO::Compress::Base::Common::Parse_boolean, 0], #'decode' => [IO::Compress::Base::Common::Parse_any, undef], #'consumeinput' => [IO::Compress::Base::Common::Parse_boolean, 0], $self->getExtraParams(), #'Todo - Revert to ordinary file on end Z_STREAM_END'=> 0, # ContinueAfterEof } ; $Valid->{trailingdata} = [IO::Compress::Base::Common::Parse_writable_scalar, undef] if *$self->{OneShot} ; $got->parse($Valid, @_ ) or $self->croakError("${class}: " . $got->getError()) ; $self->postCheckParams($got) or $self->croakError("${class}: " . $self->error()) ; return $got; } sub _create { my $obj = shift; my $got = shift; my $append_mode = shift ; my $class = ref $obj; $obj->croakError("$class: Missing Input parameter") if ! @_ && ! $got ; my $inValue = shift ; *$obj->{OneShot} = 0 ; if (! $got) { $got = $obj->checkParams($class, undef, @_) or return undef ; } my $inType = whatIsInput($inValue, 1); $obj->ckInputParam($class, $inValue, 1) or return undef ; *$obj->{InNew} = 1; $obj->ckParams($got) or $obj->croakError("${class}: " . *$obj->{Error}); if ($inType eq 'buffer' || $inType eq 'code') { *$obj->{Buffer} = $inValue ; *$obj->{InputEvent} = $inValue if $inType eq 'code' ; } else { if ($inType eq 'handle') { *$obj->{FH} = $inValue ; *$obj->{Handle} = 1 ; # Need to rewind for Scan *$obj->{FH}->seek(0, SEEK_SET) if $got->getValue('scan'); } else { no warnings ; my $mode = '<'; $mode = '+<' if $got->getValue('scan'); *$obj->{StdIO} = ($inValue eq '-'); *$obj->{FH} = new IO::File "$mode $inValue" or return $obj->saveErrorString(undef, "cannot open file '$inValue': $!", $!) ; } *$obj->{LineNo} = $. = 0; setBinModeInput(*$obj->{FH}) ; my $buff = "" ; *$obj->{Buffer} = \$buff ; } # if ($got->getValue('decode')) { # my $want_encoding = $got->getValue('decode'); # *$obj->{Encoding} = IO::Compress::Base::Common::getEncoding($obj, $class, $want_encoding); # } # else { # *$obj->{Encoding} = undef; # } *$obj->{InputLength} = $got->parsed('inputlength') ? $got->getValue('inputlength') : undef ; *$obj->{InputLengthRemaining} = $got->getValue('inputlength'); *$obj->{BufferOffset} = 0 ; *$obj->{AutoClose} = $got->getValue('autoclose'); *$obj->{Strict} = $got->getValue('strict'); *$obj->{BlockSize} = $got->getValue('blocksize'); *$obj->{Append} = $got->getValue('append'); *$obj->{AppendOutput} = $append_mode || $got->getValue('append'); *$obj->{ConsumeInput} = $got->getValue('consumeinput'); *$obj->{Transparent} = $got->getValue('transparent'); *$obj->{MultiStream} = $got->getValue('multistream'); # TODO - move these two into RawDeflate *$obj->{Scan} = $got->getValue('scan'); *$obj->{ParseExtra} = $got->getValue('parseextra') || $got->getValue('strict') ; *$obj->{Type} = ''; *$obj->{Prime} = $got->getValue('prime') || '' ; *$obj->{Pending} = ''; *$obj->{Plain} = 0; *$obj->{PlainBytesRead} = 0; *$obj->{InflatedBytesRead} = 0; *$obj->{UnCompSize} = new U64; *$obj->{CompSize} = new U64; *$obj->{TotalInflatedBytesRead} = 0; *$obj->{NewStream} = 0 ; *$obj->{EventEof} = 0 ; *$obj->{ClassName} = $class ; *$obj->{Params} = $got ; if (*$obj->{ConsumeInput}) { *$obj->{InNew} = 0; *$obj->{Closed} = 0; return $obj } my $status = $obj->mkUncomp($got); return undef unless defined $status; *$obj->{InNew} = 0; *$obj->{Closed} = 0; if ($status) { # Need to try uncompressing to catch the case # where the compressed file uncompresses to an # empty string - so eof is set immediately. my $out_buffer = ''; $status = $obj->read(\$out_buffer); if ($status < 0) { *$obj->{ReadStatus} = [ $status, $obj->error(), $obj->errorNo() ]; } $obj->ungetc($out_buffer) if length $out_buffer; } else { return undef unless *$obj->{Transparent}; $obj->clearError(); *$obj->{Type} = 'plain'; *$obj->{Plain} = 1; $obj->pushBack(*$obj->{HeaderPending}) ; } push @{ *$obj->{InfoList} }, *$obj->{Info} ; $obj->saveStatus(STATUS_OK) ; *$obj->{InNew} = 0; *$obj->{Closed} = 0; return $obj; } sub ckInputParam { my $self = shift ; my $from = shift ; my $inType = whatIsInput($_[0], $_[1]); $self->croakError("$from: input parameter not a filename, filehandle, array ref or scalar ref") if ! $inType ; # if ($inType eq 'filename' ) # { # return $self->saveErrorString(1, "$from: input filename is undef or null string", STATUS_ERROR) # if ! defined $_[0] || $_[0] eq '' ; # # if ($_[0] ne '-' && ! -e $_[0] ) # { # return $self->saveErrorString(1, # "input file '$_[0]' does not exist", STATUS_ERROR); # } # } return 1; } sub _inf { my $obj = shift ; my $class = (caller)[0] ; my $name = (caller(1))[3] ; $obj->croakError("$name: expected at least 1 parameters\n") unless @_ >= 1 ; my $input = shift ; my $haveOut = @_ ; my $output = shift ; my $x = new IO::Compress::Base::Validator($class, *$obj->{Error}, $name, $input, $output) or return undef ; push @_, $output if $haveOut && $x->{Hash}; *$obj->{OneShot} = 1 ; my $got = $obj->checkParams($name, undef, @_) or return undef ; if ($got->parsed('trailingdata')) { # my $value = $got->valueRef('TrailingData'); # warn "TD $value "; # #$value = $$value; ## warn "TD $value $$value "; # # return retErr($obj, "Parameter 'TrailingData' not writable") # if readonly $$value ; # # if (ref $$value) # { # return retErr($obj,"Parameter 'TrailingData' not a scalar reference") # if ref $$value ne 'SCALAR' ; # # *$obj->{TrailingData} = $$value ; # } # else # { # return retErr($obj,"Parameter 'TrailingData' not a scalar") # if ref $value ne 'SCALAR' ; # # *$obj->{TrailingData} = $value ; # } *$obj->{TrailingData} = $got->getValue('trailingdata'); } *$obj->{MultiStream} = $got->getValue('multistream'); $got->setValue('multistream', 0); $x->{Got} = $got ; # if ($x->{Hash}) # { # while (my($k, $v) = each %$input) # { # $v = \$input->{$k} # unless defined $v ; # # $obj->_singleTarget($x, $k, $v, @_) # or return undef ; # } # # return keys %$input ; # } if ($x->{GlobMap}) { $x->{oneInput} = 1 ; foreach my $pair (@{ $x->{Pairs} }) { my ($from, $to) = @$pair ; $obj->_singleTarget($x, $from, $to, @_) or return undef ; } return scalar @{ $x->{Pairs} } ; } if (! $x->{oneOutput} ) { my $inFile = ($x->{inType} eq 'filenames' || $x->{inType} eq 'filename'); $x->{inType} = $inFile ? 'filename' : 'buffer'; foreach my $in ($x->{oneInput} ? $input : @$input) { my $out ; $x->{oneInput} = 1 ; $obj->_singleTarget($x, $in, $output, @_) or return undef ; } return 1 ; } # finally the 1 to 1 and n to 1 return $obj->_singleTarget($x, $input, $output, @_); croak "should not be here" ; } sub retErr { my $x = shift ; my $string = shift ; ${ $x->{Error} } = $string ; return undef ; } sub _singleTarget { my $self = shift ; my $x = shift ; my $input = shift; my $output = shift; my $buff = ''; $x->{buff} = \$buff ; my $fh ; if ($x->{outType} eq 'filename') { my $mode = '>' ; $mode = '>>' if $x->{Got}->getValue('append') ; $x->{fh} = new IO::File "$mode $output" or return retErr($x, "cannot open file '$output': $!") ; binmode $x->{fh} if $x->{Got}->valueOrDefault('binmodeout'); } elsif ($x->{outType} eq 'handle') { $x->{fh} = $output; binmode $x->{fh} if $x->{Got}->valueOrDefault('binmodeout'); if ($x->{Got}->getValue('append')) { seek($x->{fh}, 0, SEEK_END) or return retErr($x, "Cannot seek to end of output filehandle: $!") ; } } elsif ($x->{outType} eq 'buffer' ) { $$output = '' unless $x->{Got}->getValue('append'); $x->{buff} = $output ; } if ($x->{oneInput}) { defined $self->_rd2($x, $input, $output) or return undef; } else { for my $element ( ($x->{inType} eq 'hash') ? keys %$input : @$input) { defined $self->_rd2($x, $element, $output) or return undef ; } } if ( ($x->{outType} eq 'filename' && $output ne '-') || ($x->{outType} eq 'handle' && $x->{Got}->getValue('autoclose'))) { $x->{fh}->close() or return retErr($x, $!); delete $x->{fh}; } return 1 ; } sub _rd2 { my $self = shift ; my $x = shift ; my $input = shift; my $output = shift; my $z = IO::Compress::Base::Common::createSelfTiedObject($x->{Class}, *$self->{Error}); $z->_create($x->{Got}, 1, $input, @_) or return undef ; my $status ; my $fh = $x->{fh}; while (1) { while (($status = $z->read($x->{buff})) > 0) { if ($fh) { syswrite $fh, ${ $x->{buff} } or return $z->saveErrorString(undef, "Error writing to output file: $!", $!); ${ $x->{buff} } = '' ; } } if (! $x->{oneOutput} ) { my $ot = $x->{outType} ; if ($ot eq 'array') { push @$output, $x->{buff} } elsif ($ot eq 'hash') { $output->{$input} = $x->{buff} } my $buff = ''; $x->{buff} = \$buff; } last if $status < 0 || $z->smartEof(); last unless *$self->{MultiStream}; $status = $z->nextStream(); last unless $status == 1 ; } return $z->closeError(undef) if $status < 0 ; ${ *$self->{TrailingData} } = $z->trailingData() if defined *$self->{TrailingData} ; $z->close() or return undef ; return 1 ; } sub TIEHANDLE { return $_[0] if ref($_[0]); die "OOPS\n" ; } sub UNTIE { my $self = shift ; } sub getHeaderInfo { my $self = shift ; wantarray ? @{ *$self->{InfoList} } : *$self->{Info}; } sub readBlock { my $self = shift ; my $buff = shift ; my $size = shift ; if (defined *$self->{CompressedInputLength}) { if (*$self->{CompressedInputLengthRemaining} == 0) { delete *$self->{CompressedInputLength}; *$self->{CompressedInputLengthDone} = 1; return STATUS_OK ; } $size = List::Util::min($size, *$self->{CompressedInputLengthRemaining} ); *$self->{CompressedInputLengthRemaining} -= $size ; } my $status = $self->smartRead($buff, $size) ; return $self->saveErrorString(STATUS_ERROR, "Error Reading Data: $!", $!) if $status == STATUS_ERROR ; if ($status == 0 ) { *$self->{Closed} = 1 ; *$self->{EndStream} = 1 ; return $self->saveErrorString(STATUS_ERROR, "unexpected end of file", STATUS_ERROR); } return STATUS_OK; } sub postBlockChk { return STATUS_OK; } sub _raw_read { # return codes # >0 - ok, number of bytes read # =0 - ok, eof # <0 - not ok my $self = shift ; return G_EOF if *$self->{Closed} ; return G_EOF if *$self->{EndStream} ; my $buffer = shift ; my $scan_mode = shift ; if (*$self->{Plain}) { my $tmp_buff ; my $len = $self->smartRead(\$tmp_buff, *$self->{BlockSize}) ; return $self->saveErrorString(G_ERR, "Error reading data: $!", $!) if $len == STATUS_ERROR ; if ($len == 0 ) { *$self->{EndStream} = 1 ; } else { *$self->{PlainBytesRead} += $len ; $$buffer .= $tmp_buff; } return $len ; } if (*$self->{NewStream}) { $self->gotoNextStream() > 0 or return G_ERR; # For the headers that actually uncompressed data, put the # uncompressed data into the output buffer. $$buffer .= *$self->{Pending} ; my $len = length *$self->{Pending} ; *$self->{Pending} = ''; return $len; } my $temp_buf = ''; my $outSize = 0; my $status = $self->readBlock(\$temp_buf, *$self->{BlockSize}, $outSize) ; return G_ERR if $status == STATUS_ERROR ; my $buf_len = 0; if ($status == STATUS_OK) { my $beforeC_len = length $temp_buf; my $before_len = defined $$buffer ? length $$buffer : 0 ; $status = *$self->{Uncomp}->uncompr(\$temp_buf, $buffer, defined *$self->{CompressedInputLengthDone} || $self->smartEof(), $outSize); # Remember the input buffer if it wasn't consumed completely $self->pushBack($temp_buf) if *$self->{Uncomp}{ConsumesInput}; return $self->saveErrorString(G_ERR, *$self->{Uncomp}{Error}, *$self->{Uncomp}{ErrorNo}) if $self->saveStatus($status) == STATUS_ERROR; $self->postBlockChk($buffer, $before_len) == STATUS_OK or return G_ERR; $buf_len = defined $$buffer ? length($$buffer) - $before_len : 0; *$self->{CompSize}->add($beforeC_len - length $temp_buf) ; *$self->{InflatedBytesRead} += $buf_len ; *$self->{TotalInflatedBytesRead} += $buf_len ; *$self->{UnCompSize}->add($buf_len) ; $self->filterUncompressed($buffer, $before_len); # if (*$self->{Encoding}) { # use Encode ; # *$self->{PendingDecode} .= substr($$buffer, $before_len) ; # my $got = *$self->{Encoding}->decode(*$self->{PendingDecode}, Encode::FB_QUIET) ; # substr($$buffer, $before_len) = $got; # } } if ($status == STATUS_ENDSTREAM) { *$self->{EndStream} = 1 ; my $trailer; my $trailer_size = *$self->{Info}{TrailerLength} ; my $got = 0; if (*$self->{Info}{TrailerLength}) { $got = $self->smartRead(\$trailer, $trailer_size) ; } if ($got == $trailer_size) { $self->chkTrailer($trailer) == STATUS_OK or return G_ERR; } else { return $self->TrailerError("trailer truncated. Expected " . "$trailer_size bytes, got $got") if *$self->{Strict}; $self->pushBack($trailer) ; } # TODO - if want to file file pointer, do it here if (! $self->smartEof()) { *$self->{NewStream} = 1 ; if (*$self->{MultiStream}) { *$self->{EndStream} = 0 ; return $buf_len ; } } } # return the number of uncompressed bytes read return $buf_len ; } sub reset { my $self = shift ; return *$self->{Uncomp}->reset(); } sub filterUncompressed { } #sub isEndStream #{ # my $self = shift ; # return *$self->{NewStream} || # *$self->{EndStream} ; #} sub nextStream { my $self = shift ; my $status = $self->gotoNextStream(); $status == 1 or return $status ; *$self->{TotalInflatedBytesRead} = 0 ; *$self->{LineNo} = $. = 0; return 1; } sub gotoNextStream { my $self = shift ; if (! *$self->{NewStream}) { my $status = 1; my $buffer ; # TODO - make this more efficient if know the offset for the end of # the stream and seekable $status = $self->read($buffer) while $status > 0 ; return $status if $status < 0; } *$self->{NewStream} = 0 ; *$self->{EndStream} = 0 ; *$self->{CompressedInputLengthDone} = undef ; *$self->{CompressedInputLength} = undef ; $self->reset(); *$self->{UnCompSize}->reset(); *$self->{CompSize}->reset(); my $magic = $self->ckMagic(); if ( ! defined $magic) { if (! *$self->{Transparent} || $self->eof()) { *$self->{EndStream} = 1 ; return 0; } $self->clearError(); *$self->{Type} = 'plain'; *$self->{Plain} = 1; $self->pushBack(*$self->{HeaderPending}) ; } else { *$self->{Info} = $self->readHeader($magic); if ( ! defined *$self->{Info} ) { *$self->{EndStream} = 1 ; return -1; } } push @{ *$self->{InfoList} }, *$self->{Info} ; return 1; } sub streamCount { my $self = shift ; return 1 if ! defined *$self->{InfoList}; return scalar @{ *$self->{InfoList} } ; } #sub read #{ # my $status = myRead(@_); # return undef if $status < 0; # return $status; #} sub read { # return codes # >0 - ok, number of bytes read # =0 - ok, eof # <0 - not ok my $self = shift ; if (defined *$self->{ReadStatus} ) { my $status = *$self->{ReadStatus}[0]; $self->saveErrorString( @{ *$self->{ReadStatus} } ); delete *$self->{ReadStatus} ; return $status ; } return G_EOF if *$self->{Closed} ; my $buffer ; if (ref $_[0] ) { $self->croakError(*$self->{ClassName} . "::read: buffer parameter is read-only") if Scalar::Util::readonly(${ $_[0] }); $self->croakError(*$self->{ClassName} . "::read: not a scalar reference $_[0]" ) unless ref $_[0] eq 'SCALAR' ; $buffer = $_[0] ; } else { $self->croakError(*$self->{ClassName} . "::read: buffer parameter is read-only") if Scalar::Util::readonly($_[0]); $buffer = \$_[0] ; } my $length = $_[1] ; my $offset = $_[2] || 0; if (! *$self->{AppendOutput}) { if (! $offset) { $$buffer = '' ; } else { if ($offset > length($$buffer)) { $$buffer .= "\x00" x ($offset - length($$buffer)); } else { substr($$buffer, $offset) = ''; } } } elsif (! defined $$buffer) { $$buffer = '' ; } return G_EOF if !length *$self->{Pending} && *$self->{EndStream} ; # the core read will return 0 if asked for 0 bytes return 0 if defined $length && $length == 0 ; $length = $length || 0; $self->croakError(*$self->{ClassName} . "::read: length parameter is negative") if $length < 0 ; # Short-circuit if this is a simple read, with no length # or offset specified. unless ( $length || $offset) { if (length *$self->{Pending}) { $$buffer .= *$self->{Pending} ; my $len = length *$self->{Pending}; *$self->{Pending} = '' ; return $len ; } else { my $len = 0; $len = $self->_raw_read($buffer) while ! *$self->{EndStream} && $len == 0 ; return $len ; } } # Need to jump through more hoops - either length or offset # or both are specified. my $out_buffer = *$self->{Pending} ; *$self->{Pending} = ''; while (! *$self->{EndStream} && length($out_buffer) < $length) { my $buf_len = $self->_raw_read(\$out_buffer); return $buf_len if $buf_len < 0 ; } $length = length $out_buffer if length($out_buffer) < $length ; return 0 if $length == 0 ; $$buffer = '' if ! defined $$buffer; $offset = length $$buffer if *$self->{AppendOutput} ; *$self->{Pending} = $out_buffer; $out_buffer = \*$self->{Pending} ; substr($$buffer, $offset) = substr($$out_buffer, 0, $length) ; substr($$out_buffer, 0, $length) = '' ; return $length ; } sub _getline { my $self = shift ; my $status = 0 ; # Slurp Mode if ( ! defined $/ ) { my $data ; 1 while ($status = $self->read($data)) > 0 ; return ($status, \$data); } # Record Mode if ( ref $/ eq 'SCALAR' && ${$/} =~ /^\d+$/ && ${$/} > 0) { my $reclen = ${$/} ; my $data ; $status = $self->read($data, $reclen) ; return ($status, \$data); } # Paragraph Mode if ( ! length $/ ) { my $paragraph ; while (($status = $self->read($paragraph)) > 0 ) { if ($paragraph =~ s/^(.*?\n\n+)//s) { *$self->{Pending} = $paragraph ; my $par = $1 ; return (1, \$par); } } return ($status, \$paragraph); } # $/ isn't empty, or a reference, so it's Line Mode. { my $line ; my $p = \*$self->{Pending} ; while (($status = $self->read($line)) > 0 ) { my $offset = index($line, $/); if ($offset >= 0) { my $l = substr($line, 0, $offset + length $/ ); substr($line, 0, $offset + length $/) = ''; $$p = $line; return (1, \$l); } } return ($status, \$line); } } sub getline { my $self = shift; if (defined *$self->{ReadStatus} ) { $self->saveErrorString( @{ *$self->{ReadStatus} } ); delete *$self->{ReadStatus} ; return undef; } return undef if *$self->{Closed} || (!length *$self->{Pending} && *$self->{EndStream}) ; my $current_append = *$self->{AppendOutput} ; *$self->{AppendOutput} = 1; my ($status, $lineref) = $self->_getline(); *$self->{AppendOutput} = $current_append; return undef if $status < 0 || length $$lineref == 0 ; $. = ++ *$self->{LineNo} ; return $$lineref ; } sub getlines { my $self = shift; $self->croakError(*$self->{ClassName} . "::getlines: called in scalar context\n") unless wantarray; my($line, @lines); push(@lines, $line) while defined($line = $self->getline); return @lines; } sub READLINE { goto &getlines if wantarray; goto &getline; } sub getc { my $self = shift; my $buf; return $buf if $self->read($buf, 1); return undef; } sub ungetc { my $self = shift; *$self->{Pending} = "" unless defined *$self->{Pending} ; *$self->{Pending} = $_[0] . *$self->{Pending} ; } sub trailingData { my $self = shift ; if (defined *$self->{FH} || defined *$self->{InputEvent} ) { return *$self->{Prime} ; } else { my $buf = *$self->{Buffer} ; my $offset = *$self->{BufferOffset} ; return substr($$buf, $offset) ; } } sub eof { my $self = shift ; return (*$self->{Closed} || (!length *$self->{Pending} && ( $self->smartEof() || *$self->{EndStream}))) ; } sub tell { my $self = shift ; my $in ; if (*$self->{Plain}) { $in = *$self->{PlainBytesRead} ; } else { $in = *$self->{TotalInflatedBytesRead} ; } my $pending = length *$self->{Pending} ; return 0 if $pending > $in ; return $in - $pending ; } sub close { # todo - what to do if close is called before the end of the gzip file # do we remember any trailing data? my $self = shift ; return 1 if *$self->{Closed} ; untie *$self if $] >= 5.008 ; my $status = 1 ; if (defined *$self->{FH}) { if ((! *$self->{Handle} || *$self->{AutoClose}) && ! *$self->{StdIO}) { local $.; $! = 0 ; $status = *$self->{FH}->close(); return $self->saveErrorString(0, $!, $!) if !*$self->{InNew} && $self->saveStatus($!) != 0 ; } delete *$self->{FH} ; $! = 0 ; } *$self->{Closed} = 1 ; return 1; } sub DESTROY { my $self = shift ; local ($., $@, $!, $^E, $?); $self->close() ; } sub seek { my $self = shift ; my $position = shift; my $whence = shift ; my $here = $self->tell() ; my $target = 0 ; if ($whence == SEEK_SET) { $target = $position ; } elsif ($whence == SEEK_CUR) { $target = $here + $position ; } elsif ($whence == SEEK_END) { $target = $position ; $self->croakError(*$self->{ClassName} . "::seek: SEEK_END not allowed") ; } else { $self->croakError(*$self->{ClassName} ."::seek: unknown value, $whence, for whence parameter"); } # short circuit if seeking to current offset if ($target == $here) { # On ordinary filehandles, seeking to the current # position also clears the EOF condition, so we # emulate this behavior locally while simultaneously # cascading it to the underlying filehandle if (*$self->{Plain}) { *$self->{EndStream} = 0; seek(*$self->{FH},0,1) if *$self->{FH}; } return 1; } # Outlaw any attempt to seek backwards $self->croakError( *$self->{ClassName} ."::seek: cannot seek backwards") if $target < $here ; # Walk the file to the new offset my $offset = $target - $here ; my $got; while (($got = $self->read(my $buffer, List::Util::min($offset, *$self->{BlockSize})) ) > 0) { $offset -= $got; last if $offset == 0 ; } $here = $self->tell() ; return $offset == 0 ? 1 : 0 ; } sub fileno { my $self = shift ; return defined *$self->{FH} ? fileno *$self->{FH} : undef ; } sub binmode { 1; # my $self = shift ; # return defined *$self->{FH} # ? binmode *$self->{FH} # : 1 ; } sub opened { my $self = shift ; return ! *$self->{Closed} ; } sub autoflush { my $self = shift ; return defined *$self->{FH} ? *$self->{FH}->autoflush(@_) : undef ; } sub input_line_number { my $self = shift ; my $last = *$self->{LineNo}; $. = *$self->{LineNo} = $_[1] if @_ ; return $last; } *BINMODE = \&binmode; *SEEK = \&seek; *READ = \&read; *sysread = \&read; *TELL = \&tell; *EOF = \&eof; *FILENO = \&fileno; *CLOSE = \&close; sub _notAvailable { my $name = shift ; return sub { croak "$name Not Available: File opened only for intput" ; } ; } *print = _notAvailable('print'); *PRINT = _notAvailable('print'); *printf = _notAvailable('printf'); *PRINTF = _notAvailable('printf'); *write = _notAvailable('write'); *WRITE = _notAvailable('write'); #*sysread = \&read; #*syswrite = \&_notAvailable; package IO::Uncompress::Base ; 1 ; __END__ =head1 NAME IO::Uncompress::Base - Base Class for IO::Uncompress modules =head1 SYNOPSIS use IO::Uncompress::Base ; =head1 DESCRIPTION This module is not intended for direct use in application code. Its sole purpose if to to be sub-classed by IO::Uncompress modules. =head1 SEE ALSO L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L L L, L, L, L =head1 AUTHOR This module was written by Paul Marquess, F. =head1 MODIFICATION HISTORY See the Changes file. =head1 COPYRIGHT AND LICENSE Copyright (c) 2005-2013 Paul Marquess. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Inflate.pm000064400000066642147634506620006517 0ustar00package IO::Uncompress::Inflate ; # for RFC1950 use strict ; use warnings; use bytes; use IO::Compress::Base::Common 2.061 qw(:Status ); use IO::Compress::Zlib::Constants 2.061 ; use IO::Uncompress::RawInflate 2.061 ; require Exporter ; our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $InflateError); $VERSION = '2.061'; $InflateError = ''; @ISA = qw( Exporter IO::Uncompress::RawInflate ); @EXPORT_OK = qw( $InflateError inflate ) ; %EXPORT_TAGS = %IO::Uncompress::RawInflate::DEFLATE_CONSTANTS ; push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ; Exporter::export_ok_tags('all'); sub new { my $class = shift ; my $obj = IO::Compress::Base::Common::createSelfTiedObject($class, \$InflateError); $obj->_create(undef, 0, @_); } sub inflate { my $obj = IO::Compress::Base::Common::createSelfTiedObject(undef, \$InflateError); return $obj->_inf(@_); } sub getExtraParams { return (); } sub ckParams { my $self = shift ; my $got = shift ; # gunzip always needs adler32 $got->setValue('adler32' => 1); return 1; } sub ckMagic { my $self = shift; my $magic ; $self->smartReadExact(\$magic, ZLIB_HEADER_SIZE); *$self->{HeaderPending} = $magic ; return $self->HeaderError("Header size is " . ZLIB_HEADER_SIZE . " bytes") if length $magic != ZLIB_HEADER_SIZE; #return $self->HeaderError("CRC mismatch.") return undef if ! $self->isZlibMagic($magic) ; *$self->{Type} = 'rfc1950'; return $magic; } sub readHeader { my $self = shift; my $magic = shift ; return $self->_readDeflateHeader($magic) ; } sub chkTrailer { my $self = shift; my $trailer = shift; my $ADLER32 = unpack("N", $trailer) ; *$self->{Info}{ADLER32} = $ADLER32; return $self->TrailerError("CRC mismatch") if *$self->{Strict} && $ADLER32 != *$self->{Uncomp}->adler32() ; return STATUS_OK; } sub isZlibMagic { my $self = shift; my $buffer = shift ; return 0 if length $buffer < ZLIB_HEADER_SIZE ; my $hdr = unpack("n", $buffer) ; #return 0 if $hdr % 31 != 0 ; return $self->HeaderError("CRC mismatch.") if $hdr % 31 != 0 ; my ($CMF, $FLG) = unpack "C C", $buffer; my $cm = bits($CMF, ZLIB_CMF_CM_OFFSET, ZLIB_CMF_CM_BITS) ; # Only Deflate supported return $self->HeaderError("Not Deflate (CM is $cm)") if $cm != ZLIB_CMF_CM_DEFLATED ; # Max window value is 7 for Deflate. my $cinfo = bits($CMF, ZLIB_CMF_CINFO_OFFSET, ZLIB_CMF_CINFO_BITS) ; return $self->HeaderError("CINFO > " . ZLIB_CMF_CINFO_MAX . " (CINFO is $cinfo)") if $cinfo > ZLIB_CMF_CINFO_MAX ; return 1; } sub bits { my $data = shift ; my $offset = shift ; my $mask = shift ; ($data >> $offset ) & $mask & 0xFF ; } sub _readDeflateHeader { my ($self, $buffer) = @_ ; # if (! $buffer) { # $self->smartReadExact(\$buffer, ZLIB_HEADER_SIZE); # # *$self->{HeaderPending} = $buffer ; # # return $self->HeaderError("Header size is " . # ZLIB_HEADER_SIZE . " bytes") # if length $buffer != ZLIB_HEADER_SIZE; # # return $self->HeaderError("CRC mismatch.") # if ! isZlibMagic($buffer) ; # } my ($CMF, $FLG) = unpack "C C", $buffer; my $FDICT = bits($FLG, ZLIB_FLG_FDICT_OFFSET, ZLIB_FLG_FDICT_BITS ), my $cm = bits($CMF, ZLIB_CMF_CM_OFFSET, ZLIB_CMF_CM_BITS) ; $cm == ZLIB_CMF_CM_DEFLATED or return $self->HeaderError("Not Deflate (CM is $cm)") ; my $DICTID; if ($FDICT) { $self->smartReadExact(\$buffer, ZLIB_FDICT_SIZE) or return $self->TruncatedHeader("FDICT"); $DICTID = unpack("N", $buffer) ; } *$self->{Type} = 'rfc1950'; return { 'Type' => 'rfc1950', 'FingerprintLength' => ZLIB_HEADER_SIZE, 'HeaderLength' => ZLIB_HEADER_SIZE, 'TrailerLength' => ZLIB_TRAILER_SIZE, 'Header' => $buffer, CMF => $CMF , CM => bits($CMF, ZLIB_CMF_CM_OFFSET, ZLIB_CMF_CM_BITS ), CINFO => bits($CMF, ZLIB_CMF_CINFO_OFFSET, ZLIB_CMF_CINFO_BITS ), FLG => $FLG , FCHECK => bits($FLG, ZLIB_FLG_FCHECK_OFFSET, ZLIB_FLG_FCHECK_BITS), FDICT => bits($FLG, ZLIB_FLG_FDICT_OFFSET, ZLIB_FLG_FDICT_BITS ), FLEVEL => bits($FLG, ZLIB_FLG_LEVEL_OFFSET, ZLIB_FLG_LEVEL_BITS ), DICTID => $DICTID , }; } 1 ; __END__ =head1 NAME IO::Uncompress::Inflate - Read RFC 1950 files/buffers =head1 SYNOPSIS use IO::Uncompress::Inflate qw(inflate $InflateError) ; my $status = inflate $input => $output [,OPTS] or die "inflate failed: $InflateError\n"; my $z = new IO::Uncompress::Inflate $input [OPTS] or die "inflate failed: $InflateError\n"; $status = $z->read($buffer) $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $line = $z->getline() $char = $z->getc() $char = $z->ungetc() $char = $z->opened() $status = $z->inflateSync() $data = $z->trailingData() $status = $z->nextStream() $data = $z->getHeaderInfo() $z->tell() $z->seek($position, $whence) $z->binmode() $z->fileno() $z->eof() $z->close() $InflateError ; # IO::File mode <$z> read($z, $buffer); read($z, $buffer, $length); read($z, $buffer, $length, $offset); tell($z) seek($z, $position, $whence) binmode($z) fileno($z) eof($z) close($z) =head1 DESCRIPTION This module provides a Perl interface that allows the reading of files/buffers that conform to RFC 1950. For writing RFC 1950 files/buffers, see the companion module IO::Compress::Deflate. =head1 Functional Interface A top-level function, C, is provided to carry out "one-shot" uncompression between buffers and/or files. For finer control over the uncompression process, see the L section. use IO::Uncompress::Inflate qw(inflate $InflateError) ; inflate $input_filename_or_reference => $output_filename_or_reference [,OPTS] or die "inflate failed: $InflateError\n"; The functional interface needs Perl5.005 or better. =head2 inflate $input_filename_or_reference => $output_filename_or_reference [, OPTS] C expects at least two parameters, C<$input_filename_or_reference> and C<$output_filename_or_reference>. =head3 The C<$input_filename_or_reference> parameter The parameter, C<$input_filename_or_reference>, is used to define the source of the compressed data. It can take one of the following forms: =over 5 =item A filename If the <$input_filename_or_reference> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for reading and the input data will be read from it. =item A filehandle If the C<$input_filename_or_reference> parameter is a filehandle, the input data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input_filename_or_reference> is a scalar reference, the input data will be read from C<$$input_filename_or_reference>. =item An array reference If C<$input_filename_or_reference> is an array reference, each element in the array must be a filename. The input data will be read from each file in turn. The complete array will be walked to ensure that it only contains valid filenames before any data is uncompressed. =item An Input FileGlob string If C<$input_filename_or_reference> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The input is the list of files that match the fileglob. See L for more details. =back If the C<$input_filename_or_reference> parameter is any other type, C will be returned. =head3 The C<$output_filename_or_reference> parameter The parameter C<$output_filename_or_reference> is used to control the destination of the uncompressed data. This parameter can take one of these forms. =over 5 =item A filename If the C<$output_filename_or_reference> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for writing and the uncompressed data will be written to it. =item A filehandle If the C<$output_filename_or_reference> parameter is a filehandle, the uncompressed data will be written to it. The string '-' can be used as an alias for standard output. =item A scalar reference If C<$output_filename_or_reference> is a scalar reference, the uncompressed data will be stored in C<$$output_filename_or_reference>. =item An Array Reference If C<$output_filename_or_reference> is an array reference, the uncompressed data will be pushed onto the array. =item An Output FileGlob If C<$output_filename_or_reference> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The output is the list of files that match the fileglob. When C<$output_filename_or_reference> is an fileglob string, C<$input_filename_or_reference> must also be a fileglob string. Anything else is an error. See L for more details. =back If the C<$output_filename_or_reference> parameter is any other type, C will be returned. =head2 Notes When C<$input_filename_or_reference> maps to multiple compressed files/buffers and C<$output_filename_or_reference> is a single file/buffer, after uncompression C<$output_filename_or_reference> will contain a concatenation of all the uncompressed data from each of the input files/buffers. =head2 Optional Parameters Unless specified below, the optional parameters for C, C, are the same as those used with the OO interface defined in the L section below. =over 5 =item C<< AutoClose => 0|1 >> This option applies to any input or output data streams to C that are filehandles. If C is specified, and the value is true, it will result in all input and/or output filehandles being closed once C has completed. This parameter defaults to 0. =item C<< BinModeOut => 0|1 >> When writing to a file or filehandle, set C before writing to the file. Defaults to 0. =item C<< Append => 0|1 >> The behaviour of this option is dependent on the type of output data stream. =over 5 =item * A Buffer If C is enabled, all uncompressed data will be append to the end of the output buffer. Otherwise the output buffer will be cleared before any uncompressed data is written to it. =item * A Filename If C is enabled, the file will be opened in append mode. Otherwise the contents of the file, if any, will be truncated before any uncompressed data is written to it. =item * A Filehandle If C is enabled, the filehandle will be positioned to the end of the file via a call to C before any uncompressed data is written to it. Otherwise the file pointer will not be moved. =back When C is specified, and set to true, it will I all uncompressed data to the output data stream. So when the output is a filehandle it will carry out a seek to the eof before writing any uncompressed data. If the output is a filename, it will be opened for appending. If the output is a buffer, all uncompressed data will be appended to the existing buffer. Conversely when C is not specified, or it is present and is set to false, it will operate as follows. When the output is a filename, it will truncate the contents of the file before writing any uncompressed data. If the output is a filehandle its position will not be changed. If the output is a buffer, it will be wiped before any uncompressed data is output. Defaults to 0. =item C<< MultiStream => 0|1 >> If the input file/buffer contains multiple compressed data streams, this option will uncompress the whole lot as a single data stream. Defaults to 0. =item C<< TrailingData => $scalar >> Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option. =back =head2 Examples To read the contents of the file C and write the uncompressed data to the file C. use strict ; use warnings ; use IO::Uncompress::Inflate qw(inflate $InflateError) ; my $input = "file1.txt.1950"; my $output = "file1.txt"; inflate $input => $output or die "inflate failed: $InflateError\n"; To read from an existing Perl filehandle, C<$input>, and write the uncompressed data to a buffer, C<$buffer>. use strict ; use warnings ; use IO::Uncompress::Inflate qw(inflate $InflateError) ; use IO::File ; my $input = new IO::File " \$buffer or die "inflate failed: $InflateError\n"; To uncompress all files in the directory "/my/home" that match "*.txt.1950" and store the compressed data in the same directory use strict ; use warnings ; use IO::Uncompress::Inflate qw(inflate $InflateError) ; inflate '' => '' or die "inflate failed: $InflateError\n"; and if you want to compress each file one at a time, this will do the trick use strict ; use warnings ; use IO::Uncompress::Inflate qw(inflate $InflateError) ; for my $input ( glob "/my/home/*.txt.1950" ) { my $output = $input; $output =~ s/.1950// ; inflate $input => $output or die "Error compressing '$input': $InflateError\n"; } =head1 OO Interface =head2 Constructor The format of the constructor for IO::Uncompress::Inflate is shown below my $z = new IO::Uncompress::Inflate $input [OPTS] or die "IO::Uncompress::Inflate failed: $InflateError\n"; Returns an C object on success and undef on failure. The variable C<$InflateError> will contain an error message on failure. If you are running Perl 5.005 or better the object, C<$z>, returned from IO::Uncompress::Inflate can be used exactly like an L filehandle. This means that all normal input file operations can be carried out with C<$z>. For example, to read a line from a compressed file/buffer you can use either of these forms $line = $z->getline(); $line = <$z>; The mandatory parameter C<$input> is used to determine the source of the compressed data. This parameter can take one of three forms. =over 5 =item A filename If the C<$input> parameter is a scalar, it is assumed to be a filename. This file will be opened for reading and the compressed data will be read from it. =item A filehandle If the C<$input> parameter is a filehandle, the compressed data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input> is a scalar reference, the compressed data will be read from C<$$input>. =back =head2 Constructor Options The option names defined below are case insensitive and can be optionally prefixed by a '-'. So all of the following are valid -AutoClose -autoclose AUTOCLOSE autoclose OPTS is a combination of the following options: =over 5 =item C<< AutoClose => 0|1 >> This option is only valid when the C<$input> parameter is a filehandle. If specified, and the value is true, it will result in the file being closed once either the C method is called or the IO::Uncompress::Inflate object is destroyed. This parameter defaults to 0. =item C<< MultiStream => 0|1 >> Allows multiple concatenated compressed streams to be treated as a single compressed stream. Decompression will stop once either the end of the file/buffer is reached, an error is encountered (premature eof, corrupt compressed data) or the end of a stream is not immediately followed by the start of another stream. This parameter defaults to 0. =item C<< Prime => $string >> This option will uncompress the contents of C<$string> before processing the input file/buffer. This option can be useful when the compressed data is embedded in another file/data structure and it is not possible to work out where the compressed data begins without having to read the first few bytes. If this is the case, the uncompression can be I with these bytes using this option. =item C<< Transparent => 0|1 >> If this option is set and the input file/buffer is not compressed data, the module will allow reading of it anyway. In addition, if the input file/buffer does contain compressed data and there is non-compressed data immediately following it, setting this option will make this module treat the whole file/buffer as a single data stream. This option defaults to 1. =item C<< BlockSize => $num >> When reading the compressed input data, IO::Uncompress::Inflate will read it in blocks of C<$num> bytes. This option defaults to 4096. =item C<< InputLength => $size >> When present this option will limit the number of compressed bytes read from the input file/buffer to C<$size>. This option can be used in the situation where there is useful data directly after the compressed data stream and you know beforehand the exact length of the compressed data stream. This option is mostly used when reading from a filehandle, in which case the file pointer will be left pointing to the first byte directly after the compressed data stream. This option defaults to off. =item C<< Append => 0|1 >> This option controls what the C method does with uncompressed data. If set to 1, all uncompressed data will be appended to the output parameter of the C method. If set to 0, the contents of the output parameter of the C method will be overwritten by the uncompressed data. Defaults to 0. =item C<< Strict => 0|1 >> This option controls whether the extra checks defined below are used when carrying out the decompression. When Strict is on, the extra tests are carried out, when Strict is off they are not. The default for this option is off. =over 5 =item 1 The ADLER32 checksum field must be present. =item 2 The value of the ADLER32 field read must match the adler32 value of the uncompressed data actually contained in the file. =back =back =head2 Examples TODO =head1 Methods =head2 read Usage is $status = $z->read($buffer) Reads a block of compressed data (the size the the compressed block is determined by the C option in the constructor), uncompresses it and writes any uncompressed data into C<$buffer>. If the C parameter is set in the constructor, the uncompressed data will be appended to the C<$buffer> parameter. Otherwise C<$buffer> will be overwritten. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 read Usage is $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $status = read($z, $buffer, $length) $status = read($z, $buffer, $length, $offset) Attempt to read C<$length> bytes of uncompressed data into C<$buffer>. The main difference between this form of the C method and the previous one, is that this one will attempt to return I C<$length> bytes. The only circumstances that this function will not is if end-of-file or an IO error is encountered. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 getline Usage is $line = $z->getline() $line = <$z> Reads a single line. This method fully supports the use of of the variable C<$/> (or C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C is in use) to determine what constitutes an end of line. Paragraph mode, record mode and file slurp mode are all supported. =head2 getc Usage is $char = $z->getc() Read a single character. =head2 ungetc Usage is $char = $z->ungetc($string) =head2 inflateSync Usage is $status = $z->inflateSync() TODO =head2 getHeaderInfo Usage is $hdr = $z->getHeaderInfo(); @hdrs = $z->getHeaderInfo(); This method returns either a hash reference (in scalar context) or a list or hash references (in array context) that contains information about each of the header fields in the compressed data stream(s). =head2 tell Usage is $z->tell() tell $z Returns the uncompressed file offset. =head2 eof Usage is $z->eof(); eof($z); Returns true if the end of the compressed input stream has been reached. =head2 seek $z->seek($position, $whence); seek($z, $position, $whence); Provides a sub-set of the C functionality, with the restriction that it is only legal to seek forward in the input file/buffer. It is a fatal error to attempt to seek backward. Note that the implementation of C in this module does not provide true random access to a compressed file/buffer. It works by uncompressing data from the current offset in the file/buffer until it reaches the ucompressed offset specified in the parameters to C. For very small files this may be acceptable behaviour. For large files it may cause an unacceptable delay. The C<$whence> parameter takes one the usual values, namely SEEK_SET, SEEK_CUR or SEEK_END. Returns 1 on success, 0 on failure. =head2 binmode Usage is $z->binmode binmode $z ; This is a noop provided for completeness. =head2 opened $z->opened() Returns true if the object currently refers to a opened file/buffer. =head2 autoflush my $prev = $z->autoflush() my $prev = $z->autoflush(EXPR) If the C<$z> object is associated with a file or a filehandle, this method returns the current autoflush setting for the underlying filehandle. If C is present, and is non-zero, it will enable flushing after every write/print operation. If C<$z> is associated with a buffer, this method has no effect and always returns C. B that the special variable C<$|> B be used to set or retrieve the autoflush setting. =head2 input_line_number $z->input_line_number() $z->input_line_number(EXPR) Returns the current uncompressed line number. If C is present it has the effect of setting the line number. Note that setting the line number does not change the current position within the file/buffer being read. The contents of C<$/> are used to to determine what constitutes a line terminator. =head2 fileno $z->fileno() fileno($z) If the C<$z> object is associated with a file or a filehandle, C will return the underlying file descriptor. Once the C method is called C will return C. If the C<$z> object is associated with a buffer, this method will return C. =head2 close $z->close() ; close $z ; Closes the output file/buffer. For most versions of Perl this method will be automatically invoked if the IO::Uncompress::Inflate object is destroyed (either explicitly or by the variable with the reference to the object going out of scope). The exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In these cases, the C method will be called automatically, but not until global destruction of all live objects when the program is terminating. Therefore, if you want your scripts to be able to run on all versions of Perl, you should call C explicitly and not rely on automatic closing. Returns true on success, otherwise 0. If the C option has been enabled when the IO::Uncompress::Inflate object was created, and the object is associated with a file, the underlying file will also be closed. =head2 nextStream Usage is my $status = $z->nextStream(); Skips to the next compressed data stream in the input file/buffer. If a new compressed data stream is found, the eof marker will be cleared and C<$.> will be reset to 0. Returns 1 if a new stream was found, 0 if none was found, and -1 if an error was encountered. =head2 trailingData Usage is my $data = $z->trailingData(); Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. It only makes sense to call this method once the end of the compressed data stream has been encountered. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option in the constructor. =head1 Importing No symbolic constants are required by this IO::Uncompress::Inflate at present. =over 5 =item :all Imports C and C<$InflateError>. Same as doing this use IO::Uncompress::Inflate qw(inflate $InflateError) ; =back =head1 EXAMPLES =head2 Working with Net::FTP See L =head1 SEE ALSO L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L L L, L, L, L For RFC 1950, 1951 and 1952 see F, F and F The I compression library was written by Jean-loup Gailly F and Mark Adler F. The primary site for the I compression library is F. The primary site for gzip is F. =head1 AUTHOR This module was written by Paul Marquess, F. =head1 MODIFICATION HISTORY See the Changes file. =head1 COPYRIGHT AND LICENSE Copyright (c) 2005-2013 Paul Marquess. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Unzip.pm000064400000150263147634506620006233 0ustar00package IO::Uncompress::Unzip; require 5.006 ; # for RFC1952 use strict ; use warnings; #use bytes; use IO::File; use IO::Uncompress::RawInflate 2.061 ; use IO::Compress::Base::Common 2.061 qw(:Status ); use IO::Uncompress::Adapter::Inflate 2.061 ; use IO::Uncompress::Adapter::Identity 2.061 ; use IO::Compress::Zlib::Extra 2.061 ; use IO::Compress::Zip::Constants 2.061 ; use Compress::Raw::Zlib 2.061 () ; BEGIN { eval{ require IO::Uncompress::Adapter::Bunzip2 ; import IO::Uncompress::Adapter::Bunzip2 } ; eval{ require IO::Uncompress::Adapter::UnLzma ; import IO::Uncompress::Adapter::UnLzma } ; } require Exporter ; our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $UnzipError, %headerLookup); $VERSION = '2.061'; $UnzipError = ''; @ISA = qw(Exporter IO::Uncompress::RawInflate); @EXPORT_OK = qw( $UnzipError unzip ); %EXPORT_TAGS = %IO::Uncompress::RawInflate::EXPORT_TAGS ; push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ; Exporter::export_ok_tags('all'); %headerLookup = ( ZIP_CENTRAL_HDR_SIG, \&skipCentralDirectory, ZIP_END_CENTRAL_HDR_SIG, \&skipEndCentralDirectory, ZIP64_END_CENTRAL_REC_HDR_SIG, \&skipCentralDirectory64Rec, ZIP64_END_CENTRAL_LOC_HDR_SIG, \&skipCentralDirectory64Loc, ZIP64_ARCHIVE_EXTRA_SIG, \&skipArchiveExtra, ZIP64_DIGITAL_SIGNATURE_SIG, \&skipDigitalSignature, ); sub new { my $class = shift ; my $obj = IO::Compress::Base::Common::createSelfTiedObject($class, \$UnzipError); $obj->_create(undef, 0, @_); } sub unzip { my $obj = IO::Compress::Base::Common::createSelfTiedObject(undef, \$UnzipError); return $obj->_inf(@_) ; } sub getExtraParams { return ( # # Zip header fields 'name' => [IO::Compress::Base::Common::Parse_any, undef], 'stream' => [IO::Compress::Base::Common::Parse_boolean, 0], # TODO - This means reading the central directory to get # 1. the local header offsets # 2. The compressed data length ); } sub ckParams { my $self = shift ; my $got = shift ; # unzip always needs crc32 $got->setValue('crc32' => 1); *$self->{UnzipData}{Name} = $got->getValue('name'); return 1; } sub mkUncomp { my $self = shift ; my $got = shift ; my $magic = $self->ckMagic() or return 0; *$self->{Info} = $self->readHeader($magic) or return undef ; return 1; } sub ckMagic { my $self = shift; my $magic ; $self->smartReadExact(\$magic, 4); *$self->{HeaderPending} = $magic ; return $self->HeaderError("Minimum header size is " . 4 . " bytes") if length $magic != 4 ; return $self->HeaderError("Bad Magic") if ! _isZipMagic($magic) ; *$self->{Type} = 'zip'; return $magic ; } sub fastForward { my $self = shift; my $offset = shift; # TODO - if Stream isn't enabled & reading from file, use seek my $buffer = ''; my $c = 1024 * 16; while ($offset > 0) { $c = length $offset if length $offset < $c ; $offset -= $c; $self->smartReadExact(\$buffer, $c) or return 0; } return 1; } sub readHeader { my $self = shift; my $magic = shift ; my $name = *$self->{UnzipData}{Name} ; my $hdr = $self->_readZipHeader($magic) ; while (defined $hdr) { if (! defined $name || $hdr->{Name} eq $name) { return $hdr ; } # skip the data # TODO - when Stream is off, use seek my $buffer; if (*$self->{ZipData}{Streaming}) { while (1) { my $b; my $status = $self->smartRead(\$b, 1024 * 16); return undef if $status <= 0 ; my $temp_buf; my $out; $status = *$self->{Uncomp}->uncompr(\$b, \$temp_buf, 0, $out); return $self->saveErrorString(undef, *$self->{Uncomp}{Error}, *$self->{Uncomp}{ErrorNo}) if $self->saveStatus($status) == STATUS_ERROR; if ($status == STATUS_ENDSTREAM) { *$self->{Uncomp}->reset(); $self->pushBack($b) ; last; } } # skip the trailer $self->smartReadExact(\$buffer, $hdr->{TrailerLength}) or return $self->saveErrorString(undef, "Truncated file"); } else { my $c = $hdr->{CompressedLength}->get64bit(); $self->fastForward($c) or return $self->saveErrorString(undef, "Truncated file"); $buffer = ''; } $self->chkTrailer($buffer) == STATUS_OK or return $self->saveErrorString(undef, "Truncated file"); $hdr = $self->_readFullZipHeader(); return $self->saveErrorString(undef, "Cannot find '$name'") if $self->smartEof(); } return undef; } sub chkTrailer { my $self = shift; my $trailer = shift; my ($sig, $CRC32, $cSize, $uSize) ; my ($cSizeHi, $uSizeHi) = (0, 0); if (*$self->{ZipData}{Streaming}) { $sig = unpack ("V", substr($trailer, 0, 4)); $CRC32 = unpack ("V", substr($trailer, 4, 4)); if (*$self->{ZipData}{Zip64} ) { $cSize = U64::newUnpack_V64 substr($trailer, 8, 8); $uSize = U64::newUnpack_V64 substr($trailer, 16, 8); } else { $cSize = U64::newUnpack_V32 substr($trailer, 8, 4); $uSize = U64::newUnpack_V32 substr($trailer, 12, 4); } return $self->TrailerError("Data Descriptor signature, got $sig") if $sig != ZIP_DATA_HDR_SIG; } else { ($CRC32, $cSize, $uSize) = (*$self->{ZipData}{Crc32}, *$self->{ZipData}{CompressedLen}, *$self->{ZipData}{UnCompressedLen}); } *$self->{Info}{CRC32} = *$self->{ZipData}{CRC32} ; *$self->{Info}{CompressedLength} = $cSize->get64bit(); *$self->{Info}{UncompressedLength} = $uSize->get64bit(); if (*$self->{Strict}) { return $self->TrailerError("CRC mismatch") if $CRC32 != *$self->{ZipData}{CRC32} ; return $self->TrailerError("CSIZE mismatch.") if ! $cSize->equal(*$self->{CompSize}); return $self->TrailerError("USIZE mismatch.") if ! $uSize->equal(*$self->{UnCompSize}); } my $reachedEnd = STATUS_ERROR ; # check for central directory or end of central directory while (1) { my $magic ; my $got = $self->smartRead(\$magic, 4); return $self->saveErrorString(STATUS_ERROR, "Truncated file") if $got != 4 && *$self->{Strict}; if ($got == 0) { return STATUS_EOF ; } elsif ($got < 0) { return STATUS_ERROR ; } elsif ($got < 4) { $self->pushBack($magic) ; return STATUS_OK ; } my $sig = unpack("V", $magic) ; my $hdr; if ($hdr = $headerLookup{$sig}) { if (&$hdr($self, $magic) != STATUS_OK ) { if (*$self->{Strict}) { return STATUS_ERROR ; } else { $self->clearError(); return STATUS_OK ; } } if ($sig == ZIP_END_CENTRAL_HDR_SIG) { return STATUS_OK ; last; } } elsif ($sig == ZIP_LOCAL_HDR_SIG) { $self->pushBack($magic) ; return STATUS_OK ; } else { # put the data back $self->pushBack($magic) ; last; } } return $reachedEnd ; } sub skipCentralDirectory { my $self = shift; my $magic = shift ; my $buffer; $self->smartReadExact(\$buffer, 46 - 4) or return $self->TrailerError("Minimum header size is " . 46 . " bytes") ; my $keep = $magic . $buffer ; *$self->{HeaderPending} = $keep ; #my $versionMadeBy = unpack ("v", substr($buffer, 4-4, 2)); #my $extractVersion = unpack ("v", substr($buffer, 6-4, 2)); #my $gpFlag = unpack ("v", substr($buffer, 8-4, 2)); #my $compressedMethod = unpack ("v", substr($buffer, 10-4, 2)); #my $lastModTime = unpack ("V", substr($buffer, 12-4, 4)); #my $crc32 = unpack ("V", substr($buffer, 16-4, 4)); my $compressedLength = unpack ("V", substr($buffer, 20-4, 4)); my $uncompressedLength = unpack ("V", substr($buffer, 24-4, 4)); my $filename_length = unpack ("v", substr($buffer, 28-4, 2)); my $extra_length = unpack ("v", substr($buffer, 30-4, 2)); my $comment_length = unpack ("v", substr($buffer, 32-4, 2)); #my $disk_start = unpack ("v", substr($buffer, 34-4, 2)); #my $int_file_attrib = unpack ("v", substr($buffer, 36-4, 2)); #my $ext_file_attrib = unpack ("V", substr($buffer, 38-4, 2)); #my $lcl_hdr_offset = unpack ("V", substr($buffer, 42-4, 2)); my $filename; my $extraField; my $comment ; if ($filename_length) { $self->smartReadExact(\$filename, $filename_length) or return $self->TruncatedTrailer("filename"); $keep .= $filename ; } if ($extra_length) { $self->smartReadExact(\$extraField, $extra_length) or return $self->TruncatedTrailer("extra"); $keep .= $extraField ; } if ($comment_length) { $self->smartReadExact(\$comment, $comment_length) or return $self->TruncatedTrailer("comment"); $keep .= $comment ; } return STATUS_OK ; } sub skipArchiveExtra { my $self = shift; my $magic = shift ; my $buffer; $self->smartReadExact(\$buffer, 4) or return $self->TrailerError("Minimum header size is " . 4 . " bytes") ; my $keep = $magic . $buffer ; my $size = unpack ("V", $buffer); $self->smartReadExact(\$buffer, $size) or return $self->TrailerError("Minimum header size is " . $size . " bytes") ; $keep .= $buffer ; *$self->{HeaderPending} = $keep ; return STATUS_OK ; } sub skipCentralDirectory64Rec { my $self = shift; my $magic = shift ; my $buffer; $self->smartReadExact(\$buffer, 8) or return $self->TrailerError("Minimum header size is " . 8 . " bytes") ; my $keep = $magic . $buffer ; my ($sizeLo, $sizeHi) = unpack ("V V", $buffer); my $size = $sizeHi * U64::MAX32 + $sizeLo; $self->fastForward($size) or return $self->TrailerError("Minimum header size is " . $size . " bytes") ; #$keep .= $buffer ; #*$self->{HeaderPending} = $keep ; #my $versionMadeBy = unpack ("v", substr($buffer, 0, 2)); #my $extractVersion = unpack ("v", substr($buffer, 2, 2)); #my $diskNumber = unpack ("V", substr($buffer, 4, 4)); #my $cntrlDirDiskNo = unpack ("V", substr($buffer, 8, 4)); #my $entriesInThisCD = unpack ("V V", substr($buffer, 12, 8)); #my $entriesInCD = unpack ("V V", substr($buffer, 20, 8)); #my $sizeOfCD = unpack ("V V", substr($buffer, 28, 8)); #my $offsetToCD = unpack ("V V", substr($buffer, 36, 8)); return STATUS_OK ; } sub skipCentralDirectory64Loc { my $self = shift; my $magic = shift ; my $buffer; $self->smartReadExact(\$buffer, 20 - 4) or return $self->TrailerError("Minimum header size is " . 20 . " bytes") ; my $keep = $magic . $buffer ; *$self->{HeaderPending} = $keep ; #my $startCdDisk = unpack ("V", substr($buffer, 4-4, 4)); #my $offsetToCD = unpack ("V V", substr($buffer, 8-4, 8)); #my $diskCount = unpack ("V", substr($buffer, 16-4, 4)); return STATUS_OK ; } sub skipEndCentralDirectory { my $self = shift; my $magic = shift ; my $buffer; $self->smartReadExact(\$buffer, 22 - 4) or return $self->TrailerError("Minimum header size is " . 22 . " bytes") ; my $keep = $magic . $buffer ; *$self->{HeaderPending} = $keep ; #my $diskNumber = unpack ("v", substr($buffer, 4-4, 2)); #my $cntrlDirDiskNo = unpack ("v", substr($buffer, 6-4, 2)); #my $entriesInThisCD = unpack ("v", substr($buffer, 8-4, 2)); #my $entriesInCD = unpack ("v", substr($buffer, 10-4, 2)); #my $sizeOfCD = unpack ("V", substr($buffer, 12-4, 4)); #my $offsetToCD = unpack ("V", substr($buffer, 16-4, 4)); my $comment_length = unpack ("v", substr($buffer, 20-4, 2)); my $comment ; if ($comment_length) { $self->smartReadExact(\$comment, $comment_length) or return $self->TruncatedTrailer("comment"); $keep .= $comment ; } return STATUS_OK ; } sub _isZipMagic { my $buffer = shift ; return 0 if length $buffer < 4 ; my $sig = unpack("V", $buffer) ; return $sig == ZIP_LOCAL_HDR_SIG ; } sub _readFullZipHeader($) { my ($self) = @_ ; my $magic = '' ; $self->smartReadExact(\$magic, 4); *$self->{HeaderPending} = $magic ; return $self->HeaderError("Minimum header size is " . 30 . " bytes") if length $magic != 4 ; return $self->HeaderError("Bad Magic") if ! _isZipMagic($magic) ; my $status = $self->_readZipHeader($magic); delete *$self->{Transparent} if ! defined $status ; return $status ; } sub _readZipHeader($) { my ($self, $magic) = @_ ; my ($HeaderCRC) ; my ($buffer) = '' ; $self->smartReadExact(\$buffer, 30 - 4) or return $self->HeaderError("Minimum header size is " . 30 . " bytes") ; my $keep = $magic . $buffer ; *$self->{HeaderPending} = $keep ; my $extractVersion = unpack ("v", substr($buffer, 4-4, 2)); my $gpFlag = unpack ("v", substr($buffer, 6-4, 2)); my $compressedMethod = unpack ("v", substr($buffer, 8-4, 2)); my $lastModTime = unpack ("V", substr($buffer, 10-4, 4)); my $crc32 = unpack ("V", substr($buffer, 14-4, 4)); my $compressedLength = U64::newUnpack_V32 substr($buffer, 18-4, 4); my $uncompressedLength = U64::newUnpack_V32 substr($buffer, 22-4, 4); my $filename_length = unpack ("v", substr($buffer, 26-4, 2)); my $extra_length = unpack ("v", substr($buffer, 28-4, 2)); my $filename; my $extraField; my @EXTRA = (); my $streamingMode = ($gpFlag & ZIP_GP_FLAG_STREAMING_MASK) ? 1 : 0 ; return $self->HeaderError("Encrypted content not supported") if $gpFlag & (ZIP_GP_FLAG_ENCRYPTED_MASK|ZIP_GP_FLAG_STRONG_ENCRYPTED_MASK); return $self->HeaderError("Patch content not supported") if $gpFlag & ZIP_GP_FLAG_PATCHED_MASK; *$self->{ZipData}{Streaming} = $streamingMode; if ($filename_length) { $self->smartReadExact(\$filename, $filename_length) or return $self->TruncatedHeader("Filename"); $keep .= $filename ; } my $zip64 = 0 ; if ($extra_length) { $self->smartReadExact(\$extraField, $extra_length) or return $self->TruncatedHeader("Extra Field"); my $bad = IO::Compress::Zlib::Extra::parseRawExtra($extraField, \@EXTRA, 1, 0); return $self->HeaderError($bad) if defined $bad; $keep .= $extraField ; my %Extra ; for (@EXTRA) { $Extra{$_->[0]} = \$_->[1]; } if (defined $Extra{ZIP_EXTRA_ID_ZIP64()}) { $zip64 = 1 ; my $buff = ${ $Extra{ZIP_EXTRA_ID_ZIP64()} }; # This code assumes that all the fields in the Zip64 # extra field aren't necessarily present. The spec says that # they only exist if the equivalent local headers are -1. if (! $streamingMode) { my $offset = 0 ; if (U64::full32 $uncompressedLength->get32bit() ) { $uncompressedLength = U64::newUnpack_V64 substr($buff, 0, 8); $offset += 8 ; } if (U64::full32 $compressedLength->get32bit() ) { $compressedLength = U64::newUnpack_V64 substr($buff, $offset, 8); $offset += 8 ; } } } } *$self->{ZipData}{Zip64} = $zip64; if (! $streamingMode) { *$self->{ZipData}{Streaming} = 0; *$self->{ZipData}{Crc32} = $crc32; *$self->{ZipData}{CompressedLen} = $compressedLength; *$self->{ZipData}{UnCompressedLen} = $uncompressedLength; *$self->{CompressedInputLengthRemaining} = *$self->{CompressedInputLength} = $compressedLength->get64bit(); } *$self->{ZipData}{CRC32} = Compress::Raw::Zlib::crc32(undef); *$self->{ZipData}{Method} = $compressedMethod; if ($compressedMethod == ZIP_CM_DEFLATE) { *$self->{Type} = 'zip-deflate'; my $obj = IO::Uncompress::Adapter::Inflate::mkUncompObject(1,0,0); *$self->{Uncomp} = $obj; } elsif ($compressedMethod == ZIP_CM_BZIP2) { return $self->HeaderError("Unsupported Compression format $compressedMethod") if ! defined $IO::Uncompress::Adapter::Bunzip2::VERSION ; *$self->{Type} = 'zip-bzip2'; my $obj = IO::Uncompress::Adapter::Bunzip2::mkUncompObject(); *$self->{Uncomp} = $obj; } elsif ($compressedMethod == ZIP_CM_LZMA) { return $self->HeaderError("Unsupported Compression format $compressedMethod") if ! defined $IO::Uncompress::Adapter::UnLzma::VERSION ; *$self->{Type} = 'zip-lzma'; my $LzmaHeader; $self->smartReadExact(\$LzmaHeader, 4) or return $self->saveErrorString(undef, "Truncated file"); my ($verHi, $verLo) = unpack ("CC", substr($LzmaHeader, 0, 2)); my $LzmaPropertiesSize = unpack ("v", substr($LzmaHeader, 2, 2)); my $LzmaPropertyData; $self->smartReadExact(\$LzmaPropertyData, $LzmaPropertiesSize) or return $self->saveErrorString(undef, "Truncated file"); if (! $streamingMode) { *$self->{ZipData}{CompressedLen}->subtract(4 + $LzmaPropertiesSize) ; *$self->{CompressedInputLengthRemaining} = *$self->{CompressedInputLength} = *$self->{ZipData}{CompressedLen}->get64bit(); } my $obj = IO::Uncompress::Adapter::UnLzma::mkUncompZipObject($LzmaPropertyData); *$self->{Uncomp} = $obj; } elsif ($compressedMethod == ZIP_CM_STORE) { *$self->{Type} = 'zip-stored'; my $obj = IO::Uncompress::Adapter::Identity::mkUncompObject($streamingMode, $zip64); *$self->{Uncomp} = $obj; } else { return $self->HeaderError("Unsupported Compression format $compressedMethod"); } return { 'Type' => 'zip', 'FingerprintLength' => 4, #'HeaderLength' => $compressedMethod == 8 ? length $keep : 0, 'HeaderLength' => length $keep, 'Zip64' => $zip64, 'TrailerLength' => ! $streamingMode ? 0 : $zip64 ? 24 : 16, 'Header' => $keep, 'CompressedLength' => $compressedLength , 'UncompressedLength' => $uncompressedLength , 'CRC32' => $crc32 , 'Name' => $filename, 'Time' => _dosToUnixTime($lastModTime), 'Stream' => $streamingMode, 'MethodID' => $compressedMethod, 'MethodName' => $compressedMethod == ZIP_CM_DEFLATE ? "Deflated" : $compressedMethod == ZIP_CM_BZIP2 ? "Bzip2" : $compressedMethod == ZIP_CM_LZMA ? "Lzma" : $compressedMethod == ZIP_CM_STORE ? "Stored" : "Unknown" , # 'TextFlag' => $flag & GZIP_FLG_FTEXT ? 1 : 0, # 'HeaderCRCFlag' => $flag & GZIP_FLG_FHCRC ? 1 : 0, # 'NameFlag' => $flag & GZIP_FLG_FNAME ? 1 : 0, # 'CommentFlag' => $flag & GZIP_FLG_FCOMMENT ? 1 : 0, # 'ExtraFlag' => $flag & GZIP_FLG_FEXTRA ? 1 : 0, # 'Comment' => $comment, # 'OsID' => $os, # 'OsName' => defined $GZIP_OS_Names{$os} # ? $GZIP_OS_Names{$os} : "Unknown", # 'HeaderCRC' => $HeaderCRC, # 'Flags' => $flag, # 'ExtraFlags' => $xfl, 'ExtraFieldRaw' => $extraField, 'ExtraField' => [ @EXTRA ], } } sub filterUncompressed { my $self = shift ; if (*$self->{ZipData}{Method} == ZIP_CM_DEFLATE) { *$self->{ZipData}{CRC32} = *$self->{Uncomp}->crc32() ; } else { *$self->{ZipData}{CRC32} = Compress::Raw::Zlib::crc32(${$_[0]}, *$self->{ZipData}{CRC32}, $_[1]); } } # from Archive::Zip & info-zip sub _dosToUnixTime { my $dt = shift; my $year = ( ( $dt >> 25 ) & 0x7f ) + 80; my $mon = ( ( $dt >> 21 ) & 0x0f ) - 1; my $mday = ( ( $dt >> 16 ) & 0x1f ); my $hour = ( ( $dt >> 11 ) & 0x1f ); my $min = ( ( $dt >> 5 ) & 0x3f ); my $sec = ( ( $dt << 1 ) & 0x3e ); use POSIX 'mktime'; my $time_t = mktime( $sec, $min, $hour, $mday, $mon, $year, 0, 0, -1 ); return 0 if ! defined $time_t; return $time_t; } #sub scanCentralDirectory #{ # # Use cases # # 1 32-bit CD # # 2 64-bit CD # # my $self = shift ; # # my @CD = (); # my $offset = $self->findCentralDirectoryOffset(); # # return 0 # if ! defined $offset; # # $self->smarkSeek($offset, 0, SEEK_SET) ; # # # Now walk the Central Directory Records # my $buffer ; # while ($self->smartReadExact(\$buffer, 46) && # unpack("V", $buffer) == ZIP_CENTRAL_HDR_SIG) { # # my $compressedLength = unpack ("V", substr($buffer, 20, 4)); # my $filename_length = unpack ("v", substr($buffer, 28, 2)); # my $extra_length = unpack ("v", substr($buffer, 30, 2)); # my $comment_length = unpack ("v", substr($buffer, 32, 2)); # # $self->smarkSeek($filename_length + $extra_length + $comment_length, 0, SEEK_CUR) # if $extra_length || $comment_length || $filename_length; # push @CD, $compressedLength ; # } # #} # #sub findCentralDirectoryOffset #{ # my $self = shift ; # # # Most common use-case is where there is no comment, so # # know exactly where the end of central directory record # # should be. # # $self->smarkSeek(-22, 0, SEEK_END) ; # # my $buffer; # $self->smartReadExact(\$buffer, 22) ; # # my $zip64 = 0; # my $centralDirOffset ; # if ( unpack("V", $buffer) == ZIP_END_CENTRAL_HDR_SIG ) { # $centralDirOffset = unpack ("V", substr($buffer, 16, 2)); # } # else { # die "xxxx"; # } # # return $centralDirOffset ; #} # #sub is84BitCD #{ # # TODO # my $self = shift ; #} sub skip { my $self = shift; my $size = shift; use Fcntl qw(SEEK_CUR); if (ref $size eq 'U64') { $self->smartSeek($size->get64bit(), SEEK_CUR); } else { $self->smartSeek($size, SEEK_CUR); } } sub scanCentralDirectory { my $self = shift; my $here = $self->tell(); # Use cases # 1 32-bit CD # 2 64-bit CD my @CD = (); my $offset = $self->findCentralDirectoryOffset(); return () if ! defined $offset; $self->smarkSeek($offset, 0, SEEK_SET) ; # Now walk the Central Directory Records my $buffer ; while ($self->smartReadExact(\$buffer, 46) && unpack("V", $buffer) == ZIP_CENTRAL_HDR_SIG) { my $compressedLength = unpack("V", substr($buffer, 20, 4)); my $uncompressedLength = unpack("V", substr($buffer, 24, 4)); my $filename_length = unpack("v", substr($buffer, 28, 2)); my $extra_length = unpack("v", substr($buffer, 30, 2)); my $comment_length = unpack("v", substr($buffer, 32, 2)); $self->skip($filename_length ) ; my $v64 = new U64 $compressedLength ; if (U64::full32 $compressedLength ) { $self->smartReadExact(\$buffer, $extra_length) ; die "xxx $offset $comment_length $filename_length $extra_length" . length($buffer) if length($buffer) != $extra_length; my $got = $self->get64Extra($buffer, U64::full32 $uncompressedLength); # If not Zip64 extra field, assume size is 0xFFFFFFFF $v64 = $got if defined $got; } else { $self->skip($extra_length) ; } $self->skip($comment_length ) ; push @CD, $v64 ; } $self->smartSeek($here, 0, SEEK_SET) ; return @CD; } sub get64Extra { my $self = shift ; my $buffer = shift; my $is_uncomp = shift ; my $extra = IO::Compress::Zlib::Extra::findID(0x0001, $buffer); if (! defined $extra) { return undef; } else { my $u64 = U64::newUnpack_V64(substr($extra, $is_uncomp ? 8 : 0)) ; return $u64; } } sub offsetFromZip64 { my $self = shift ; my $here = shift; $self->smartSeek($here - 20, 0, SEEK_SET) or die "xx $!" ; my $buffer; my $got = 0; $self->smartReadExact(\$buffer, 20) or die "xxx $here $got $!" ; if ( unpack("V", $buffer) == ZIP64_END_CENTRAL_LOC_HDR_SIG ) { my $cd64 = U64::Value_VV64 substr($buffer, 8, 8); $self->smartSeek($cd64, 0, SEEK_SET) ; $self->smartReadExact(\$buffer, 4) or die "xxx" ; if ( unpack("V", $buffer) == ZIP64_END_CENTRAL_REC_HDR_SIG ) { $self->smartReadExact(\$buffer, 8) or die "xxx" ; my $size = U64::Value_VV64($buffer); $self->smartReadExact(\$buffer, $size) or die "xxx" ; my $cd64 = U64::Value_VV64 substr($buffer, 36, 8); return $cd64 ; } die "zzz"; } die "zzz"; } use constant Pack_ZIP_END_CENTRAL_HDR_SIG => pack("V", ZIP_END_CENTRAL_HDR_SIG); sub findCentralDirectoryOffset { my $self = shift ; # Most common use-case is where there is no comment, so # know exactly where the end of central directory record # should be. $self->smartSeek(-22, 0, SEEK_END) ; my $here = $self->tell(); my $buffer; $self->smartReadExact(\$buffer, 22) or die "xxx" ; my $zip64 = 0; my $centralDirOffset ; if ( unpack("V", $buffer) == ZIP_END_CENTRAL_HDR_SIG ) { $centralDirOffset = unpack("V", substr($buffer, 16, 4)); } else { $self->smartSeek(0, 0, SEEK_END) ; my $fileLen = $self->tell(); my $want = 0 ; while(1) { $want += 1024; my $seekTo = $fileLen - $want; if ($seekTo < 0 ) { $seekTo = 0; $want = $fileLen ; } $self->smartSeek( $seekTo, 0, SEEK_SET) or die "xxx $!" ; my $got; $self->smartReadExact($buffer, $want) or die "xxx " ; my $pos = rindex( $buffer, Pack_ZIP_END_CENTRAL_HDR_SIG); if ($pos >= 0) { #$here = $self->tell(); $here = $seekTo + $pos ; $centralDirOffset = unpack("V", substr($buffer, $pos + 16, 4)); last ; } return undef if $want == $fileLen; } } $centralDirOffset = $self->offsetFromZip64($here) if U64::full32 $centralDirOffset ; return $centralDirOffset ; } 1; __END__ =head1 NAME IO::Uncompress::Unzip - Read zip files/buffers =head1 SYNOPSIS use IO::Uncompress::Unzip qw(unzip $UnzipError) ; my $status = unzip $input => $output [,OPTS] or die "unzip failed: $UnzipError\n"; my $z = new IO::Uncompress::Unzip $input [OPTS] or die "unzip failed: $UnzipError\n"; $status = $z->read($buffer) $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $line = $z->getline() $char = $z->getc() $char = $z->ungetc() $char = $z->opened() $status = $z->inflateSync() $data = $z->trailingData() $status = $z->nextStream() $data = $z->getHeaderInfo() $z->tell() $z->seek($position, $whence) $z->binmode() $z->fileno() $z->eof() $z->close() $UnzipError ; # IO::File mode <$z> read($z, $buffer); read($z, $buffer, $length); read($z, $buffer, $length, $offset); tell($z) seek($z, $position, $whence) binmode($z) fileno($z) eof($z) close($z) =head1 DESCRIPTION This module provides a Perl interface that allows the reading of zlib files/buffers. For writing zip files/buffers, see the companion module IO::Compress::Zip. =head1 Functional Interface A top-level function, C, is provided to carry out "one-shot" uncompression between buffers and/or files. For finer control over the uncompression process, see the L section. use IO::Uncompress::Unzip qw(unzip $UnzipError) ; unzip $input_filename_or_reference => $output_filename_or_reference [,OPTS] or die "unzip failed: $UnzipError\n"; The functional interface needs Perl5.005 or better. =head2 unzip $input_filename_or_reference => $output_filename_or_reference [, OPTS] C expects at least two parameters, C<$input_filename_or_reference> and C<$output_filename_or_reference>. =head3 The C<$input_filename_or_reference> parameter The parameter, C<$input_filename_or_reference>, is used to define the source of the compressed data. It can take one of the following forms: =over 5 =item A filename If the <$input_filename_or_reference> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for reading and the input data will be read from it. =item A filehandle If the C<$input_filename_or_reference> parameter is a filehandle, the input data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input_filename_or_reference> is a scalar reference, the input data will be read from C<$$input_filename_or_reference>. =item An array reference If C<$input_filename_or_reference> is an array reference, each element in the array must be a filename. The input data will be read from each file in turn. The complete array will be walked to ensure that it only contains valid filenames before any data is uncompressed. =item An Input FileGlob string If C<$input_filename_or_reference> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The input is the list of files that match the fileglob. See L for more details. =back If the C<$input_filename_or_reference> parameter is any other type, C will be returned. =head3 The C<$output_filename_or_reference> parameter The parameter C<$output_filename_or_reference> is used to control the destination of the uncompressed data. This parameter can take one of these forms. =over 5 =item A filename If the C<$output_filename_or_reference> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for writing and the uncompressed data will be written to it. =item A filehandle If the C<$output_filename_or_reference> parameter is a filehandle, the uncompressed data will be written to it. The string '-' can be used as an alias for standard output. =item A scalar reference If C<$output_filename_or_reference> is a scalar reference, the uncompressed data will be stored in C<$$output_filename_or_reference>. =item An Array Reference If C<$output_filename_or_reference> is an array reference, the uncompressed data will be pushed onto the array. =item An Output FileGlob If C<$output_filename_or_reference> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The output is the list of files that match the fileglob. When C<$output_filename_or_reference> is an fileglob string, C<$input_filename_or_reference> must also be a fileglob string. Anything else is an error. See L for more details. =back If the C<$output_filename_or_reference> parameter is any other type, C will be returned. =head2 Notes When C<$input_filename_or_reference> maps to multiple compressed files/buffers and C<$output_filename_or_reference> is a single file/buffer, after uncompression C<$output_filename_or_reference> will contain a concatenation of all the uncompressed data from each of the input files/buffers. =head2 Optional Parameters Unless specified below, the optional parameters for C, C, are the same as those used with the OO interface defined in the L section below. =over 5 =item C<< AutoClose => 0|1 >> This option applies to any input or output data streams to C that are filehandles. If C is specified, and the value is true, it will result in all input and/or output filehandles being closed once C has completed. This parameter defaults to 0. =item C<< BinModeOut => 0|1 >> When writing to a file or filehandle, set C before writing to the file. Defaults to 0. =item C<< Append => 0|1 >> The behaviour of this option is dependent on the type of output data stream. =over 5 =item * A Buffer If C is enabled, all uncompressed data will be append to the end of the output buffer. Otherwise the output buffer will be cleared before any uncompressed data is written to it. =item * A Filename If C is enabled, the file will be opened in append mode. Otherwise the contents of the file, if any, will be truncated before any uncompressed data is written to it. =item * A Filehandle If C is enabled, the filehandle will be positioned to the end of the file via a call to C before any uncompressed data is written to it. Otherwise the file pointer will not be moved. =back When C is specified, and set to true, it will I all uncompressed data to the output data stream. So when the output is a filehandle it will carry out a seek to the eof before writing any uncompressed data. If the output is a filename, it will be opened for appending. If the output is a buffer, all uncompressed data will be appended to the existing buffer. Conversely when C is not specified, or it is present and is set to false, it will operate as follows. When the output is a filename, it will truncate the contents of the file before writing any uncompressed data. If the output is a filehandle its position will not be changed. If the output is a buffer, it will be wiped before any uncompressed data is output. Defaults to 0. =item C<< MultiStream => 0|1 >> If the input file/buffer contains multiple compressed data streams, this option will uncompress the whole lot as a single data stream. Defaults to 0. =item C<< TrailingData => $scalar >> Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option. =back =head2 Examples Say you have a zip file, C, that only contains a single member, you can read it and write the uncompressed data to the file C like this. use strict ; use warnings ; use IO::Uncompress::Unzip qw(unzip $UnzipError) ; my $input = "file1.zip"; my $output = "file1.txt"; unzip $input => $output or die "unzip failed: $UnzipError\n"; If you have a zip file that contains multiple members and want to read a specific member from the file, say C<"data1">, use the C option use strict ; use warnings ; use IO::Uncompress::Unzip qw(unzip $UnzipError) ; my $input = "file1.zip"; my $output = "file1.txt"; unzip $input => $output, Name => "data1" or die "unzip failed: $UnzipError\n"; Alternatively, if you want to read the C<"data1"> member into memory, use a scalar reference for the C partameter. use strict ; use warnings ; use IO::Uncompress::Unzip qw(unzip $UnzipError) ; my $input = "file1.zip"; my $output ; unzip $input => \$output, Name => "data1" or die "unzip failed: $UnzipError\n"; # $output now contains the uncompressed data To read from an existing Perl filehandle, C<$input>, and write the uncompressed data to a buffer, C<$buffer>. use strict ; use warnings ; use IO::Uncompress::Unzip qw(unzip $UnzipError) ; use IO::File ; my $input = new IO::File " \$buffer or die "unzip failed: $UnzipError\n"; =head1 OO Interface =head2 Constructor The format of the constructor for IO::Uncompress::Unzip is shown below my $z = new IO::Uncompress::Unzip $input [OPTS] or die "IO::Uncompress::Unzip failed: $UnzipError\n"; Returns an C object on success and undef on failure. The variable C<$UnzipError> will contain an error message on failure. If you are running Perl 5.005 or better the object, C<$z>, returned from IO::Uncompress::Unzip can be used exactly like an L filehandle. This means that all normal input file operations can be carried out with C<$z>. For example, to read a line from a compressed file/buffer you can use either of these forms $line = $z->getline(); $line = <$z>; The mandatory parameter C<$input> is used to determine the source of the compressed data. This parameter can take one of three forms. =over 5 =item A filename If the C<$input> parameter is a scalar, it is assumed to be a filename. This file will be opened for reading and the compressed data will be read from it. =item A filehandle If the C<$input> parameter is a filehandle, the compressed data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input> is a scalar reference, the compressed data will be read from C<$$input>. =back =head2 Constructor Options The option names defined below are case insensitive and can be optionally prefixed by a '-'. So all of the following are valid -AutoClose -autoclose AUTOCLOSE autoclose OPTS is a combination of the following options: =over 5 =item C<< Name => "membername" >> Open "membername" from the zip file for reading. =item C<< AutoClose => 0|1 >> This option is only valid when the C<$input> parameter is a filehandle. If specified, and the value is true, it will result in the file being closed once either the C method is called or the IO::Uncompress::Unzip object is destroyed. This parameter defaults to 0. =item C<< MultiStream => 0|1 >> Treats the complete zip file/buffer as a single compressed data stream. When reading in multi-stream mode each member of the zip file/buffer will be uncompressed in turn until the end of the file/buffer is encountered. This parameter defaults to 0. =item C<< Prime => $string >> This option will uncompress the contents of C<$string> before processing the input file/buffer. This option can be useful when the compressed data is embedded in another file/data structure and it is not possible to work out where the compressed data begins without having to read the first few bytes. If this is the case, the uncompression can be I with these bytes using this option. =item C<< Transparent => 0|1 >> If this option is set and the input file/buffer is not compressed data, the module will allow reading of it anyway. In addition, if the input file/buffer does contain compressed data and there is non-compressed data immediately following it, setting this option will make this module treat the whole file/buffer as a single data stream. This option defaults to 1. =item C<< BlockSize => $num >> When reading the compressed input data, IO::Uncompress::Unzip will read it in blocks of C<$num> bytes. This option defaults to 4096. =item C<< InputLength => $size >> When present this option will limit the number of compressed bytes read from the input file/buffer to C<$size>. This option can be used in the situation where there is useful data directly after the compressed data stream and you know beforehand the exact length of the compressed data stream. This option is mostly used when reading from a filehandle, in which case the file pointer will be left pointing to the first byte directly after the compressed data stream. This option defaults to off. =item C<< Append => 0|1 >> This option controls what the C method does with uncompressed data. If set to 1, all uncompressed data will be appended to the output parameter of the C method. If set to 0, the contents of the output parameter of the C method will be overwritten by the uncompressed data. Defaults to 0. =item C<< Strict => 0|1 >> This option controls whether the extra checks defined below are used when carrying out the decompression. When Strict is on, the extra tests are carried out, when Strict is off they are not. The default for this option is off. =back =head2 Examples TODO =head1 Methods =head2 read Usage is $status = $z->read($buffer) Reads a block of compressed data (the size the the compressed block is determined by the C option in the constructor), uncompresses it and writes any uncompressed data into C<$buffer>. If the C parameter is set in the constructor, the uncompressed data will be appended to the C<$buffer> parameter. Otherwise C<$buffer> will be overwritten. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 read Usage is $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $status = read($z, $buffer, $length) $status = read($z, $buffer, $length, $offset) Attempt to read C<$length> bytes of uncompressed data into C<$buffer>. The main difference between this form of the C method and the previous one, is that this one will attempt to return I C<$length> bytes. The only circumstances that this function will not is if end-of-file or an IO error is encountered. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 getline Usage is $line = $z->getline() $line = <$z> Reads a single line. This method fully supports the use of of the variable C<$/> (or C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C is in use) to determine what constitutes an end of line. Paragraph mode, record mode and file slurp mode are all supported. =head2 getc Usage is $char = $z->getc() Read a single character. =head2 ungetc Usage is $char = $z->ungetc($string) =head2 inflateSync Usage is $status = $z->inflateSync() TODO =head2 getHeaderInfo Usage is $hdr = $z->getHeaderInfo(); @hdrs = $z->getHeaderInfo(); This method returns either a hash reference (in scalar context) or a list or hash references (in array context) that contains information about each of the header fields in the compressed data stream(s). =head2 tell Usage is $z->tell() tell $z Returns the uncompressed file offset. =head2 eof Usage is $z->eof(); eof($z); Returns true if the end of the compressed input stream has been reached. =head2 seek $z->seek($position, $whence); seek($z, $position, $whence); Provides a sub-set of the C functionality, with the restriction that it is only legal to seek forward in the input file/buffer. It is a fatal error to attempt to seek backward. Note that the implementation of C in this module does not provide true random access to a compressed file/buffer. It works by uncompressing data from the current offset in the file/buffer until it reaches the ucompressed offset specified in the parameters to C. For very small files this may be acceptable behaviour. For large files it may cause an unacceptable delay. The C<$whence> parameter takes one the usual values, namely SEEK_SET, SEEK_CUR or SEEK_END. Returns 1 on success, 0 on failure. =head2 binmode Usage is $z->binmode binmode $z ; This is a noop provided for completeness. =head2 opened $z->opened() Returns true if the object currently refers to a opened file/buffer. =head2 autoflush my $prev = $z->autoflush() my $prev = $z->autoflush(EXPR) If the C<$z> object is associated with a file or a filehandle, this method returns the current autoflush setting for the underlying filehandle. If C is present, and is non-zero, it will enable flushing after every write/print operation. If C<$z> is associated with a buffer, this method has no effect and always returns C. B that the special variable C<$|> B be used to set or retrieve the autoflush setting. =head2 input_line_number $z->input_line_number() $z->input_line_number(EXPR) Returns the current uncompressed line number. If C is present it has the effect of setting the line number. Note that setting the line number does not change the current position within the file/buffer being read. The contents of C<$/> are used to to determine what constitutes a line terminator. =head2 fileno $z->fileno() fileno($z) If the C<$z> object is associated with a file or a filehandle, C will return the underlying file descriptor. Once the C method is called C will return C. If the C<$z> object is associated with a buffer, this method will return C. =head2 close $z->close() ; close $z ; Closes the output file/buffer. For most versions of Perl this method will be automatically invoked if the IO::Uncompress::Unzip object is destroyed (either explicitly or by the variable with the reference to the object going out of scope). The exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In these cases, the C method will be called automatically, but not until global destruction of all live objects when the program is terminating. Therefore, if you want your scripts to be able to run on all versions of Perl, you should call C explicitly and not rely on automatic closing. Returns true on success, otherwise 0. If the C option has been enabled when the IO::Uncompress::Unzip object was created, and the object is associated with a file, the underlying file will also be closed. =head2 nextStream Usage is my $status = $z->nextStream(); Skips to the next compressed data stream in the input file/buffer. If a new compressed data stream is found, the eof marker will be cleared and C<$.> will be reset to 0. Returns 1 if a new stream was found, 0 if none was found, and -1 if an error was encountered. =head2 trailingData Usage is my $data = $z->trailingData(); Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. It only makes sense to call this method once the end of the compressed data stream has been encountered. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option in the constructor. =head1 Importing No symbolic constants are required by this IO::Uncompress::Unzip at present. =over 5 =item :all Imports C and C<$UnzipError>. Same as doing this use IO::Uncompress::Unzip qw(unzip $UnzipError) ; =back =head1 EXAMPLES =head2 Working with Net::FTP See L =head2 Walking through a zip file The code below can be used to traverse a zip file, one compressed data stream at a time. use IO::Uncompress::Unzip qw($UnzipError); my $zipfile = "somefile.zip"; my $u = new IO::Uncompress::Unzip $zipfile or die "Cannot open $zipfile: $UnzipError"; my $status; for ($status = 1; $status > 0; $status = $u->nextStream()) { my $name = $u->getHeaderInfo()->{Name}; warn "Processing member $name\n" ; my $buff; while (($status = $u->read($buff)) > 0) { # Do something here } last if $status < 0; } die "Error processing $zipfile: $!\n" if $status < 0 ; Each individual compressed data stream is read until the logical end-of-file is reached. Then C is called. This will skip to the start of the next compressed data stream and clear the end-of-file flag. It is also worth noting that C can be called at any time -- you don't have to wait until you have exhausted a compressed data stream before skipping to the next one. =head2 Unzipping a complete zip file to disk Daniel S. Sterling has written a script that uses C to read a zip file and unzip its contents to disk. The script is available from L =head1 SEE ALSO L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L L L, L, L, L For RFC 1950, 1951 and 1952 see F, F and F The I compression library was written by Jean-loup Gailly F and Mark Adler F. The primary site for the I compression library is F. The primary site for gzip is F. =head1 AUTHOR This module was written by Paul Marquess, F. =head1 MODIFICATION HISTORY See the Changes file. =head1 COPYRIGHT AND LICENSE Copyright (c) 2005-2013 Paul Marquess. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.