eaiovnaovbqoebvqoeavibavo UserAgent.pm000064400000005350147633770250007020 0ustar00# -*- Mode: cperl; coding: utf-8; cperl-indent-level: 4 -*- # vim: ts=4 sts=4 sw=4: package CPAN::LWP::UserAgent; use strict; use vars qw(@ISA $USER $PASSWD $SETUPDONE); use CPAN::HTTP::Credentials; # we delay requiring LWP::UserAgent and setting up inheritance until we need it $CPAN::LWP::UserAgent::VERSION = $CPAN::LWP::UserAgent::VERSION = "1.9600"; sub config { return if $SETUPDONE; if ($CPAN::META->has_usable('LWP::UserAgent')) { require LWP::UserAgent; @ISA = qw(Exporter LWP::UserAgent); ## no critic $SETUPDONE++; } else { $CPAN::Frontend->mywarn(" LWP::UserAgent not available\n"); } } sub get_basic_credentials { my($self, $realm, $uri, $proxy) = @_; if ( $proxy ) { return CPAN::HTTP::Credentials->get_proxy_credentials(); } else { return CPAN::HTTP::Credentials->get_non_proxy_credentials(); } } sub no_proxy { my ( $self, $no_proxy ) = @_; return $self->SUPER::no_proxy( split(',',$no_proxy) ); } # mirror(): Its purpose is to deal with proxy authentication. When we # call SUPER::mirror, we relly call the mirror method in # LWP::UserAgent. LWP::UserAgent will then call # $self->get_basic_credentials or some equivalent and this will be # $self->dispatched to our own get_basic_credentials method. # Our own get_basic_credentials sets $USER and $PASSWD, two globals. # 407 stands for HTTP_PROXY_AUTHENTICATION_REQUIRED. Which means # although we have gone through our get_basic_credentials, the proxy # server refuses to connect. This could be a case where the username or # password has changed in the meantime, so I'm trying once again without # $USER and $PASSWD to give the get_basic_credentials routine another # chance to set $USER and $PASSWD. # mirror(): Its purpose is to deal with proxy authentication. When we # call SUPER::mirror, we relly call the mirror method in # LWP::UserAgent. LWP::UserAgent will then call # $self->get_basic_credentials or some equivalent and this will be # $self->dispatched to our own get_basic_credentials method. # Our own get_basic_credentials sets $USER and $PASSWD, two globals. # 407 stands for HTTP_PROXY_AUTHENTICATION_REQUIRED. Which means # although we have gone through our get_basic_credentials, the proxy # server refuses to connect. This could be a case where the username or # password has changed in the meantime, so I'm trying once again without # $USER and $PASSWD to give the get_basic_credentials routine another # chance to set $USER and $PASSWD. sub mirror { my($self,$url,$aslocal) = @_; my $result = $self->SUPER::mirror($url,$aslocal); if ($result->code == 407) { CPAN::HTTP::Credentials->clear_credentials; $result = $self->SUPER::mirror($url,$aslocal); } $result; } 1; ConnCache.pm000064400000016705147633770250006752 0ustar00package LWP::ConnCache; use strict; use vars qw($VERSION $DEBUG); $VERSION = "6.02"; sub new { my($class, %cnf) = @_; my $total_capacity = 1; if (exists $cnf{total_capacity}) { $total_capacity = delete $cnf{total_capacity}; } if (%cnf && $^W) { require Carp; Carp::carp("Unrecognised options: @{[sort keys %cnf]}") } my $self = bless { cc_conns => [] }, $class; $self->total_capacity($total_capacity); $self; } sub deposit { my($self, $type, $key, $conn) = @_; push(@{$self->{cc_conns}}, [$conn, $type, $key, time]); $self->enforce_limits($type); return; } sub withdraw { my($self, $type, $key) = @_; my $conns = $self->{cc_conns}; for my $i (0 .. @$conns - 1) { my $c = $conns->[$i]; next unless $c->[1] eq $type && $c->[2] eq $key; splice(@$conns, $i, 1); # remove it return $c->[0]; } return undef; } sub total_capacity { my $self = shift; my $old = $self->{cc_limit_total}; if (@_) { $self->{cc_limit_total} = shift; $self->enforce_limits; } $old; } sub capacity { my $self = shift; my $type = shift; my $old = $self->{cc_limit}{$type}; if (@_) { $self->{cc_limit}{$type} = shift; $self->enforce_limits($type); } $old; } sub enforce_limits { my($self, $type) = @_; my $conns = $self->{cc_conns}; my @types = $type ? ($type) : ($self->get_types); for $type (@types) { next unless $self->{cc_limit}; my $limit = $self->{cc_limit}{$type}; next unless defined $limit; for my $i (reverse 0 .. @$conns - 1) { next unless $conns->[$i][1] eq $type; if (--$limit < 0) { $self->dropping(splice(@$conns, $i, 1), "$type capacity exceeded"); } } } if (defined(my $total = $self->{cc_limit_total})) { while (@$conns > $total) { $self->dropping(shift(@$conns), "Total capacity exceeded"); } } } sub dropping { my($self, $c, $reason) = @_; print "DROPPING @$c [$reason]\n" if $DEBUG; } sub drop { my($self, $checker, $reason) = @_; if (ref($checker) ne "CODE") { # make it so if (!defined $checker) { $checker = sub { 1 }; # drop all of them } elsif (_looks_like_number($checker)) { my $age_limit = $checker; my $time_limit = time - $age_limit; $reason ||= "older than $age_limit"; $checker = sub { $_[3] < $time_limit }; } else { my $type = $checker; $reason ||= "drop $type"; $checker = sub { $_[1] eq $type }; # match on type } } $reason ||= "drop"; local $SIG{__DIE__}; # don't interfere with eval below local $@; my @c; for (@{$self->{cc_conns}}) { my $drop; eval { if (&$checker(@$_)) { $self->dropping($_, $reason); $drop++; } }; push(@c, $_) unless $drop; } @{$self->{cc_conns}} = @c; } sub prune { my $self = shift; $self->drop(sub { !shift->ping }, "ping"); } sub get_types { my $self = shift; my %t; $t{$_->[1]}++ for @{$self->{cc_conns}}; return keys %t; } sub get_connections { my($self, $type) = @_; my @c; for (@{$self->{cc_conns}}) { push(@c, $_->[0]) if !$type || ($type && $type eq $_->[1]); } @c; } sub _looks_like_number { $_[0] =~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/; } 1; __END__ =head1 NAME LWP::ConnCache - Connection cache manager =head1 NOTE This module is experimental. Details of its interface is likely to change in the future. =head1 SYNOPSIS use LWP::ConnCache; my $cache = LWP::ConnCache->new; $cache->deposit($type, $key, $sock); $sock = $cache->withdraw($type, $key); =head1 DESCRIPTION The C class is the standard connection cache manager for LWP::UserAgent. The following basic methods are provided: =over =item $cache = LWP::ConnCache->new( %options ) This method constructs a new C object. The only option currently accepted is 'total_capacity'. If specified it initialize the total_capacity option. It defaults to the value 1. =item $cache->total_capacity( [$num_connections] ) Get/sets the number of connection that will be cached. Connections will start to be dropped when this limit is reached. If set to C<0>, then all connections are immediately dropped. If set to C, then there is no limit. =item $cache->capacity($type, [$num_connections] ) Get/set a limit for the number of connections of the specified type that can be cached. The $type will typically be a short string like "http" or "ftp". =item $cache->drop( [$checker, [$reason]] ) Drop connections by some criteria. The $checker argument is a subroutine that is called for each connection. If the routine returns a TRUE value then the connection is dropped. The routine is called with ($conn, $type, $key, $deposit_time) as arguments. Shortcuts: If the $checker argument is absent (or C) all cached connections are dropped. If the $checker is a number then all connections untouched that the given number of seconds or more are dropped. If $checker is a string then all connections of the given type are dropped. The $reason argument is passed on to the dropped() method. =item $cache->prune Calling this method will drop all connections that are dead. This is tested by calling the ping() method on the connections. If the ping() method exists and returns a FALSE value, then the connection is dropped. =item $cache->get_types This returns all the 'type' fields used for the currently cached connections. =item $cache->get_connections( [$type] ) This returns all connection objects of the specified type. If no type is specified then all connections are returned. In scalar context the number of cached connections of the specified type is returned. =back The following methods are called by low-level protocol modules to try to save away connections and to get them back. =over =item $cache->deposit($type, $key, $conn) This method adds a new connection to the cache. As a result other already cached connections might be dropped. Multiple connections with the same $type/$key might added. =item $conn = $cache->withdraw($type, $key) This method tries to fetch back a connection that was previously deposited. If no cached connection with the specified $type/$key is found, then C is returned. There is not guarantee that a deposited connection can be withdrawn, as the cache manger is free to drop connections at any time. =back The following methods are called internally. Subclasses might want to override them. =over =item $conn->enforce_limits([$type]) This method is called with after a new connection is added (deposited) in the cache or capacity limits are adjusted. The default implementation drops connections until the specified capacity limits are not exceeded. =item $conn->dropping($conn_record, $reason) This method is called when a connection is dropped. The record belonging to the dropped connection is passed as the first argument and a string describing the reason for the drop is passed as the second argument. The default implementation makes some noise if the $LWP::ConnCache::DEBUG variable is set and nothing more. =back =head1 SUBCLASSING For specialized cache policy it makes sense to subclass C and perhaps override the deposit(), enforce_limits() and dropping() methods. The object itself is a hash. Keys prefixed with C are reserved for the base class. =head1 SEE ALSO L =head1 COPYRIGHT Copyright 2001 Gisle Aas. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Simple.pm000064400000014266147633770250006362 0ustar00package LWP::Simple; use strict; use vars qw($ua %loop_check $FULL_LWP @EXPORT @EXPORT_OK $VERSION); require Exporter; @EXPORT = qw(get head getprint getstore mirror); @EXPORT_OK = qw($ua); # I really hate this. I was a bad idea to do it in the first place. # Wonder how to get rid of it??? (It even makes LWP::Simple 7% slower # for trivial tests) use HTTP::Status; push(@EXPORT, @HTTP::Status::EXPORT); $VERSION = "6.00"; sub import { my $pkg = shift; my $callpkg = caller; Exporter::export($pkg, $callpkg, @_); } use LWP::UserAgent (); use HTTP::Status (); use HTTP::Date (); $ua = LWP::UserAgent->new; # we create a global UserAgent object $ua->agent("LWP::Simple/$VERSION "); $ua->env_proxy; sub get ($) { my $response = $ua->get(shift); return $response->decoded_content if $response->is_success; return undef; } sub head ($) { my($url) = @_; my $request = HTTP::Request->new(HEAD => $url); my $response = $ua->request($request); if ($response->is_success) { return $response unless wantarray; return (scalar $response->header('Content-Type'), scalar $response->header('Content-Length'), HTTP::Date::str2time($response->header('Last-Modified')), HTTP::Date::str2time($response->header('Expires')), scalar $response->header('Server'), ); } return; } sub getprint ($) { my($url) = @_; my $request = HTTP::Request->new(GET => $url); local($\) = ""; # ensure standard $OUTPUT_RECORD_SEPARATOR my $callback = sub { print $_[0] }; if ($^O eq "MacOS") { $callback = sub { $_[0] =~ s/\015?\012/\n/g; print $_[0] } } my $response = $ua->request($request, $callback); unless ($response->is_success) { print STDERR $response->status_line, " \n"; } $response->code; } sub getstore ($$) { my($url, $file) = @_; my $request = HTTP::Request->new(GET => $url); my $response = $ua->request($request, $file); $response->code; } sub mirror ($$) { my($url, $file) = @_; my $response = $ua->mirror($url, $file); $response->code; } 1; __END__ =head1 NAME LWP::Simple - simple procedural interface to LWP =head1 SYNOPSIS perl -MLWP::Simple -e 'getprint "http://www.sn.no"' use LWP::Simple; $content = get("http://www.sn.no/"); die "Couldn't get it!" unless defined $content; if (mirror("http://www.sn.no/", "foo") == RC_NOT_MODIFIED) { ... } if (is_success(getprint("http://www.sn.no/"))) { ... } =head1 DESCRIPTION This module is meant for people who want a simplified view of the libwww-perl library. It should also be suitable for one-liners. If you need more control or access to the header fields in the requests sent and responses received, then you should use the full object-oriented interface provided by the C module. The following functions are provided (and exported) by this module: =over 3 =item get($url) The get() function will fetch the document identified by the given URL and return it. It returns C if it fails. The $url argument can be either a string or a reference to a URI object. You will not be able to examine the response code or response headers (like 'Content-Type') when you are accessing the web using this function. If you need that information you should use the full OO interface (see L). =item head($url) Get document headers. Returns the following 5 values if successful: ($content_type, $document_length, $modified_time, $expires, $server) Returns an empty list if it fails. In scalar context returns TRUE if successful. =item getprint($url) Get and print a document identified by a URL. The document is printed to the selected default filehandle for output (normally STDOUT) as data is received from the network. If the request fails, then the status code and message are printed on STDERR. The return value is the HTTP response code. =item getstore($url, $file) Gets a document identified by a URL and stores it in the file. The return value is the HTTP response code. =item mirror($url, $file) Get and store a document identified by a URL, using I, and checking the I. Returns the HTTP response code. =back This module also exports the HTTP::Status constants and procedures. You can use them when you check the response code from getprint(), getstore() or mirror(). The constants are: RC_CONTINUE RC_SWITCHING_PROTOCOLS RC_OK RC_CREATED RC_ACCEPTED RC_NON_AUTHORITATIVE_INFORMATION RC_NO_CONTENT RC_RESET_CONTENT RC_PARTIAL_CONTENT RC_MULTIPLE_CHOICES RC_MOVED_PERMANENTLY RC_MOVED_TEMPORARILY RC_SEE_OTHER RC_NOT_MODIFIED RC_USE_PROXY RC_BAD_REQUEST RC_UNAUTHORIZED RC_PAYMENT_REQUIRED RC_FORBIDDEN RC_NOT_FOUND RC_METHOD_NOT_ALLOWED RC_NOT_ACCEPTABLE RC_PROXY_AUTHENTICATION_REQUIRED RC_REQUEST_TIMEOUT RC_CONFLICT RC_GONE RC_LENGTH_REQUIRED RC_PRECONDITION_FAILED RC_REQUEST_ENTITY_TOO_LARGE RC_REQUEST_URI_TOO_LARGE RC_UNSUPPORTED_MEDIA_TYPE RC_INTERNAL_SERVER_ERROR RC_NOT_IMPLEMENTED RC_BAD_GATEWAY RC_SERVICE_UNAVAILABLE RC_GATEWAY_TIMEOUT RC_HTTP_VERSION_NOT_SUPPORTED The HTTP::Status classification functions are: =over 3 =item is_success($rc) True if response code indicated a successful request. =item is_error($rc) True if response code indicated that an error occurred. =back The module will also export the LWP::UserAgent object as C<$ua> if you ask for it explicitly. The user agent created by this module will identify itself as "LWP::Simple/#.##" and will initialize its proxy defaults from the environment (by calling $ua->env_proxy). =head1 CAVEAT Note that if you are using both LWP::Simple and the very popular CGI.pm module, you may be importing a C function from each module, producing a warning like "Prototype mismatch: sub main::head ($) vs none". Get around this problem by just not importing LWP::Simple's C function, like so: use LWP::Simple qw(!head); use CGI qw(:standard); # then only CGI.pm defines a head() Then if you do need LWP::Simple's C function, you can just call it as C. =head1 SEE ALSO L, L, L, L, L, L Authen/Ntlm.pm000064400000012412147633770250007256 0ustar00package LWP::Authen::Ntlm; use strict; use vars qw/$VERSION/; $VERSION = "6.00"; use Authen::NTLM "1.02"; use MIME::Base64 "2.12"; sub authenticate { my($class, $ua, $proxy, $auth_param, $response, $request, $arg, $size) = @_; my($user, $pass) = $ua->get_basic_credentials($auth_param->{realm}, $request->uri, $proxy); unless(defined $user and defined $pass) { return $response; } if (!$ua->conn_cache()) { warn "The keep_alive option must be enabled for NTLM authentication to work. NTLM authentication aborted.\n"; return $response; } my($domain, $username) = split(/\\/, $user); ntlm_domain($domain); ntlm_user($username); ntlm_password($pass); my $auth_header = $proxy ? "Proxy-Authorization" : "Authorization"; # my ($challenge) = $response->header('WWW-Authenticate'); my $challenge; foreach ($response->header('WWW-Authenticate')) { last if /^NTLM/ && ($challenge=$_); } if ($challenge eq 'NTLM') { # First phase, send handshake my $auth_value = "NTLM " . ntlm(); ntlm_reset(); # Need to check this isn't a repeated fail! my $r = $response; my $retry_count = 0; while ($r) { my $auth = $r->request->header($auth_header); ++$retry_count if ($auth && $auth eq $auth_value); if ($retry_count > 2) { # here we know this failed before $response->header("Client-Warning" => "Credentials for '$user' failed before"); return $response; } $r = $r->previous; } my $referral = $request->clone; $referral->header($auth_header => $auth_value); return $ua->request($referral, $arg, $size, $response); } else { # Second phase, use the response challenge (unless non-401 code # was returned, in which case, we just send back the response # object, as is my $auth_value; if ($response->code ne '401') { return $response; } else { my $challenge; foreach ($response->header('WWW-Authenticate')) { last if /^NTLM/ && ($challenge=$_); } $challenge =~ s/^NTLM //; ntlm(); $auth_value = "NTLM " . ntlm($challenge); ntlm_reset(); } my $referral = $request->clone; $referral->header($auth_header => $auth_value); my $response2 = $ua->request($referral, $arg, $size, $response); return $response2; } } 1; =head1 NAME LWP::Authen::Ntlm - Library for enabling NTLM authentication (Microsoft) in LWP =head1 SYNOPSIS use LWP::UserAgent; use HTTP::Request::Common; my $url = 'http://www.company.com/protected_page.html'; # Set up the ntlm client and then the base64 encoded ntlm handshake message my $ua = LWP::UserAgent->new(keep_alive=>1); $ua->credentials('www.company.com:80', '', "MyDomain\\MyUserCode", 'MyPassword'); $request = GET $url; print "--Performing request now...-----------\n"; $response = $ua->request($request); print "--Done with request-------------------\n"; if ($response->is_success) {print "It worked!->" . $response->code . "\n"} else {print "It didn't work!->" . $response->code . "\n"} =head1 DESCRIPTION C allows LWP to authenticate against servers that are using the NTLM authentication scheme popularized by Microsoft. This type of authentication is common on intranets of Microsoft-centric organizations. The module takes advantage of the Authen::NTLM module by Mark Bush. Since there is also another Authen::NTLM module available from CPAN by Yee Man Chan with an entirely different interface, it is necessary to ensure that you have the correct NTLM module. In addition, there have been problems with incompatibilities between different versions of Mime::Base64, which Bush's Authen::NTLM makes use of. Therefore, it is necessary to ensure that your Mime::Base64 module supports exporting of the encode_base64 and decode_base64 functions. =head1 USAGE The module is used indirectly through LWP, rather than including it directly in your code. The LWP system will invoke the NTLM authentication when it encounters the authentication scheme while attempting to retrieve a URL from a server. In order for the NTLM authentication to work, you must have a few things set up in your code prior to attempting to retrieve the URL: =over 4 =item * Enable persistent HTTP connections To do this, pass the "keep_alive=>1" option to the LWP::UserAgent when creating it, like this: my $ua = LWP::UserAgent->new(keep_alive=>1); =item * Set the credentials on the UserAgent object The credentials must be set like this: $ua->credentials('www.company.com:80', '', "MyDomain\\MyUserCode", 'MyPassword'); Note that you cannot use the HTTP::Request object's authorization_basic() method to set the credentials. Note, too, that the 'www.company.com:80' portion only sets credentials on the specified port AND it is case-sensitive (this is due to the way LWP is coded, and has nothing to do with LWP::Authen::Ntlm) =back =head1 AVAILABILITY General queries regarding LWP should be made to the LWP Mailing List. Questions specific to LWP::Authen::Ntlm can be forwarded to jtillman@bigfoot.com =head1 COPYRIGHT Copyright (c) 2002 James Tillman. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 SEE ALSO L, L, L. Authen/Digest.pm000064400000003414147633770250007565 0ustar00package LWP::Authen::Digest; use strict; use base 'LWP::Authen::Basic'; require Digest::MD5; sub auth_header { my($class, $user, $pass, $request, $ua, $h) = @_; my $auth_param = $h->{auth_param}; my $nc = sprintf "%08X", ++$ua->{authen_md5_nonce_count}{$auth_param->{nonce}}; my $cnonce = sprintf "%8x", time; my $uri = $request->uri->path_query; $uri = "/" unless length $uri; my $md5 = Digest::MD5->new; my(@digest); $md5->add(join(":", $user, $auth_param->{realm}, $pass)); push(@digest, $md5->hexdigest); $md5->reset; push(@digest, $auth_param->{nonce}); if ($auth_param->{qop}) { push(@digest, $nc, $cnonce, ($auth_param->{qop} =~ m|^auth[,;]auth-int$|) ? 'auth' : $auth_param->{qop}); } $md5->add(join(":", $request->method, $uri)); push(@digest, $md5->hexdigest); $md5->reset; $md5->add(join(":", @digest)); my($digest) = $md5->hexdigest; $md5->reset; my %resp = map { $_ => $auth_param->{$_} } qw(realm nonce opaque); @resp{qw(username uri response algorithm)} = ($user, $uri, $digest, "MD5"); if (($auth_param->{qop} || "") =~ m|^auth([,;]auth-int)?$|) { @resp{qw(qop cnonce nc)} = ("auth", $cnonce, $nc); } my(@order) = qw(username realm qop algorithm uri nonce nc cnonce response); if($request->method =~ /^(?:POST|PUT)$/) { $md5->add($request->content); my $content = $md5->hexdigest; $md5->reset; $md5->add(join(":", @digest[0..1], $content)); $md5->reset; $resp{"message-digest"} = $md5->hexdigest; push(@order, "message-digest"); } push(@order, "opaque"); my @pairs; for (@order) { next unless defined $resp{$_}; push(@pairs, "$_=" . qq("$resp{$_}")); } my $auth_value = "Digest " . join(", ", @pairs); return $auth_value; } 1; Authen/Basic.pm000064400000004005147633770250007364 0ustar00package LWP::Authen::Basic; use strict; require MIME::Base64; sub auth_header { my($class, $user, $pass) = @_; return "Basic " . MIME::Base64::encode("$user:$pass", ""); } sub authenticate { my($class, $ua, $proxy, $auth_param, $response, $request, $arg, $size) = @_; my $realm = $auth_param->{realm} || ""; my $url = $proxy ? $request->{proxy} : $request->uri_canonical; return $response unless $url; my $host_port = $url->host_port; my $auth_header = $proxy ? "Proxy-Authorization" : "Authorization"; my @m = $proxy ? (m_proxy => $url) : (m_host_port => $host_port); push(@m, realm => $realm); my $h = $ua->get_my_handler("request_prepare", @m, sub { $_[0]{callback} = sub { my($req, $ua, $h) = @_; my($user, $pass) = $ua->credentials($host_port, $h->{realm}); if (defined $user) { my $auth_value = $class->auth_header($user, $pass, $req, $ua, $h); $req->header($auth_header => $auth_value); } }; }); $h->{auth_param} = $auth_param; if (!$proxy && !$request->header($auth_header) && $ua->credentials($host_port, $realm)) { # we can make sure this handler applies and retry add_path($h, $url->path); return $ua->request($request->clone, $arg, $size, $response); } my($user, $pass) = $ua->get_basic_credentials($realm, $url, $proxy); unless (defined $user and defined $pass) { $ua->set_my_handler("request_prepare", undef, @m); # delete handler return $response; } # check that the password has changed my ($olduser, $oldpass) = $ua->credentials($host_port, $realm); return $response if (defined $olduser and defined $oldpass and $user eq $olduser and $pass eq $oldpass); $ua->credentials($host_port, $realm, $user, $pass); add_path($h, $url->path) unless $proxy; return $ua->request($request->clone, $arg, $size, $response); } sub add_path { my($h, $path) = @_; $path =~ s,[^/]+\z,,; push(@{$h->{m_path_prefix}}, $path); } 1; Protocol/file.pm000064400000007371147633770250007650 0ustar00package LWP::Protocol::file; require LWP::Protocol; @ISA = qw(LWP::Protocol); use strict; require LWP::MediaTypes; require HTTP::Request; require HTTP::Response; require HTTP::Status; require HTTP::Date; sub request { my($self, $request, $proxy, $arg, $size) = @_; $size = 4096 unless defined $size and $size > 0; # check proxy if (defined $proxy) { return HTTP::Response->new( &HTTP::Status::RC_BAD_REQUEST, 'You can not proxy through the filesystem'); } # check method my $method = $request->method; unless ($method eq 'GET' || $method eq 'HEAD') { return HTTP::Response->new( &HTTP::Status::RC_BAD_REQUEST, 'Library does not allow method ' . "$method for 'file:' URLs"); } # check url my $url = $request->uri; my $scheme = $url->scheme; if ($scheme ne 'file') { return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR, "LWP::Protocol::file::request called for '$scheme'"); } # URL OK, look at file my $path = $url->file; # test file exists and is readable unless (-e $path) { return HTTP::Response->new( &HTTP::Status::RC_NOT_FOUND, "File `$path' does not exist"); } unless (-r _) { return HTTP::Response->new( &HTTP::Status::RC_FORBIDDEN, 'User does not have read permission'); } # looks like file exists my($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$filesize, $atime,$mtime,$ctime,$blksize,$blocks) = stat(_); # XXX should check Accept headers? # check if-modified-since my $ims = $request->header('If-Modified-Since'); if (defined $ims) { my $time = HTTP::Date::str2time($ims); if (defined $time and $time >= $mtime) { return HTTP::Response->new( &HTTP::Status::RC_NOT_MODIFIED, "$method $path"); } } # Ok, should be an OK response by now... my $response = HTTP::Response->new( &HTTP::Status::RC_OK ); # fill in response headers $response->header('Last-Modified', HTTP::Date::time2str($mtime)); if (-d _) { # If the path is a directory, process it # generate the HTML for directory opendir(D, $path) or return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR, "Cannot read directory '$path': $!"); my(@files) = sort readdir(D); closedir(D); # Make directory listing require URI::Escape; require HTML::Entities; my $pathe = $path . ( $^O eq 'MacOS' ? ':' : '/'); for (@files) { my $furl = URI::Escape::uri_escape($_); if ( -d "$pathe$_" ) { $furl .= '/'; $_ .= '/'; } my $desc = HTML::Entities::encode($_); $_ = qq{
  • $desc}; } # Ensure that the base URL is "/" terminated my $base = $url->clone; unless ($base->path =~ m|/$|) { $base->path($base->path . "/"); } my $html = join("\n", "\n", "Directory $path", "", "\n", "

    Directory listing of $path

    ", "
      ", @files, "
    ", "\n\n"); $response->header('Content-Type', 'text/html'); $response->header('Content-Length', length $html); $html = "" if $method eq "HEAD"; return $self->collect_once($arg, $response, $html); } # path is a regular file $response->header('Content-Length', $filesize); LWP::MediaTypes::guess_media_type($path, $response); # read the file if ($method ne "HEAD") { open(F, $path) or return new HTTP::Response(&HTTP::Status::RC_INTERNAL_SERVER_ERROR, "Cannot read file '$path': $!"); binmode(F); $response = $self->collect($arg, $response, sub { my $content = ""; my $bytes = sysread(F, $content, $size); return \$content if $bytes > 0; return \ ""; }); close(F); } $response; } 1; Protocol/http.pm000064400000031034147633770250007701 0ustar00package LWP::Protocol::http; use strict; require HTTP::Response; require HTTP::Status; require Net::HTTP; use vars qw(@ISA @EXTRA_SOCK_OPTS); require LWP::Protocol; @ISA = qw(LWP::Protocol); my $CRLF = "\015\012"; sub _new_socket { my($self, $host, $port, $timeout) = @_; my $conn_cache = $self->{ua}{conn_cache}; if ($conn_cache) { if (my $sock = $conn_cache->withdraw($self->socket_type, "$host:$port")) { return $sock if $sock && !$sock->can_read(0); # if the socket is readable, then either the peer has closed the # connection or there are some garbage bytes on it. In either # case we abandon it. $sock->close; } } local($^W) = 0; # IO::Socket::INET can be noisy my $sock = $self->socket_class->new(PeerAddr => $host, PeerPort => $port, LocalAddr => $self->{ua}{local_address}, Proto => 'tcp', Timeout => $timeout, KeepAlive => !!$conn_cache, SendTE => 1, $self->_extra_sock_opts($host, $port), ); unless ($sock) { # IO::Socket::INET leaves additional error messages in $@ my $status = "Can't connect to $host:$port"; if ($@ =~ /\bconnect: (.*)/ || $@ =~ /\b(Bad hostname)\b/ || $@ =~ /\b(certificate verify failed)\b/ || $@ =~ /\b(Crypt-SSLeay can't verify hostnames)\b/ ) { $status .= " ($1)"; } die "$status\n\n$@"; } # perl 5.005's IO::Socket does not have the blocking method. eval { $sock->blocking(0); }; $sock; } sub socket_type { return "http"; } sub socket_class { my $self = shift; (ref($self) || $self) . "::Socket"; } sub _extra_sock_opts # to be overridden by subclass { return @EXTRA_SOCK_OPTS; } sub _check_sock { #my($self, $req, $sock) = @_; } sub _get_sock_info { my($self, $res, $sock) = @_; if (defined(my $peerhost = $sock->peerhost)) { $res->header("Client-Peer" => "$peerhost:" . $sock->peerport); } } sub _fixup_header { my($self, $h, $url, $proxy) = @_; # Extract 'Host' header my $hhost = $url->authority; if ($hhost =~ s/^([^\@]*)\@//) { # get rid of potential "user:pass@" # add authorization header if we need them. HTTP URLs do # not really support specification of user and password, but # we allow it. if (defined($1) && not $h->header('Authorization')) { require URI::Escape; $h->authorization_basic(map URI::Escape::uri_unescape($_), split(":", $1, 2)); } } $h->init_header('Host' => $hhost); if ($proxy) { # Check the proxy URI's userinfo() for proxy credentials # export http_proxy="http://proxyuser:proxypass@proxyhost:port" my $p_auth = $proxy->userinfo(); if(defined $p_auth) { require URI::Escape; $h->proxy_authorization_basic(map URI::Escape::uri_unescape($_), split(":", $p_auth, 2)) } } } sub hlist_remove { my($hlist, $k) = @_; $k = lc $k; for (my $i = @$hlist - 2; $i >= 0; $i -= 2) { next unless lc($hlist->[$i]) eq $k; splice(@$hlist, $i, 2); } } sub request { my($self, $request, $proxy, $arg, $size, $timeout) = @_; $size ||= 4096; # check method my $method = $request->method; unless ($method =~ /^[A-Za-z0-9_!\#\$%&\'*+\-.^\`|~]+$/) { # HTTP token return HTTP::Response->new( &HTTP::Status::RC_BAD_REQUEST, 'Library does not allow method ' . "$method for 'http:' URLs"); } my $url = $request->uri; my($host, $port, $fullpath); # Check if we're proxy'ing if (defined $proxy) { # $proxy is an URL to an HTTP server which will proxy this request $host = $proxy->host; $port = $proxy->port; $fullpath = $method eq "CONNECT" ? ($url->host . ":" . $url->port) : $url->as_string; } else { $host = $url->host; $port = $url->port; $fullpath = $url->path_query; $fullpath = "/$fullpath" unless $fullpath =~ m,^/,; } # connect to remote site my $socket = $self->_new_socket($host, $port, $timeout); my $http_version = ""; if (my $proto = $request->protocol) { if ($proto =~ /^(?:HTTP\/)?(1.\d+)$/) { $http_version = $1; $socket->http_version($http_version); $socket->send_te(0) if $http_version eq "1.0"; } } $self->_check_sock($request, $socket); my @h; my $request_headers = $request->headers->clone; $self->_fixup_header($request_headers, $url, $proxy); $request_headers->scan(sub { my($k, $v) = @_; $k =~ s/^://; $v =~ s/\n/ /g; push(@h, $k, $v); }); my $content_ref = $request->content_ref; $content_ref = $$content_ref if ref($$content_ref); my $chunked; my $has_content; if (ref($content_ref) eq 'CODE') { my $clen = $request_headers->header('Content-Length'); $has_content++ if $clen; unless (defined $clen) { push(@h, "Transfer-Encoding" => "chunked"); $has_content++; $chunked++; } } else { # Set (or override) Content-Length header my $clen = $request_headers->header('Content-Length'); if (defined($$content_ref) && length($$content_ref)) { $has_content = length($$content_ref); if (!defined($clen) || $clen ne $has_content) { if (defined $clen) { warn "Content-Length header value was wrong, fixed"; hlist_remove(\@h, 'Content-Length'); } push(@h, 'Content-Length' => $has_content); } } elsif ($clen) { warn "Content-Length set when there is no content, fixed"; hlist_remove(\@h, 'Content-Length'); } } my $write_wait = 0; $write_wait = 2 if ($request_headers->header("Expect") || "") =~ /100-continue/; my $req_buf = $socket->format_request($method, $fullpath, @h); #print "------\n$req_buf\n------\n"; if (!$has_content || $write_wait || $has_content > 8*1024) { WRITE: { # Since this just writes out the header block it should almost # always succeed to send the whole buffer in a single write call. my $n = $socket->syswrite($req_buf, length($req_buf)); unless (defined $n) { redo WRITE if $!{EINTR}; if ($!{EAGAIN}) { select(undef, undef, undef, 0.1); redo WRITE; } die "write failed: $!"; } if ($n) { substr($req_buf, 0, $n, ""); } else { select(undef, undef, undef, 0.5); } redo WRITE if length $req_buf; } } my($code, $mess, @junk); my $drop_connection; if ($has_content) { my $eof; my $wbuf; my $woffset = 0; INITIAL_READ: if ($write_wait) { # skip filling $wbuf when waiting for 100-continue # because if the response is a redirect or auth required # the request will be cloned and there is no way # to reset the input stream # return here via the label after the 100-continue is read } elsif (ref($content_ref) eq 'CODE') { my $buf = &$content_ref(); $buf = "" unless defined($buf); $buf = sprintf "%x%s%s%s", length($buf), $CRLF, $buf, $CRLF if $chunked; substr($buf, 0, 0) = $req_buf if $req_buf; $wbuf = \$buf; } else { if ($req_buf) { my $buf = $req_buf . $$content_ref; $wbuf = \$buf; } else { $wbuf = $content_ref; } $eof = 1; } my $fbits = ''; vec($fbits, fileno($socket), 1) = 1; WRITE: while ($write_wait || $woffset < length($$wbuf)) { my $sel_timeout = $timeout; if ($write_wait) { $sel_timeout = $write_wait if $write_wait < $sel_timeout; } my $time_before; $time_before = time if $sel_timeout; my $rbits = $fbits; my $wbits = $write_wait ? undef : $fbits; my $sel_timeout_before = $sel_timeout; SELECT: { my $nfound = select($rbits, $wbits, undef, $sel_timeout); if ($nfound < 0) { if ($!{EINTR} || $!{EAGAIN}) { if ($time_before) { $sel_timeout = $sel_timeout_before - (time - $time_before); $sel_timeout = 0 if $sel_timeout < 0; } redo SELECT; } die "select failed: $!"; } } if ($write_wait) { $write_wait -= time - $time_before; $write_wait = 0 if $write_wait < 0; } if (defined($rbits) && $rbits =~ /[^\0]/) { # readable my $buf = $socket->_rbuf; my $n = $socket->sysread($buf, 1024, length($buf)); unless (defined $n) { die "read failed: $!" unless $!{EINTR} || $!{EAGAIN}; # if we get here the rest of the block will do nothing # and we will retry the read on the next round } elsif ($n == 0) { # the server closed the connection before we finished # writing all the request content. No need to write any more. $drop_connection++; last WRITE; } $socket->_rbuf($buf); if (!$code && $buf =~ /\015?\012\015?\012/) { # a whole response header is present, so we can read it without blocking ($code, $mess, @h) = $socket->read_response_headers(laxed => 1, junk_out => \@junk, ); if ($code eq "100") { $write_wait = 0; undef($code); goto INITIAL_READ; } else { $drop_connection++; last WRITE; # XXX should perhaps try to abort write in a nice way too } } } if (defined($wbits) && $wbits =~ /[^\0]/) { my $n = $socket->syswrite($$wbuf, length($$wbuf), $woffset); unless (defined $n) { die "write failed: $!" unless $!{EINTR} || $!{EAGAIN}; $n = 0; # will retry write on the next round } elsif ($n == 0) { die "write failed: no bytes written"; } $woffset += $n; if (!$eof && $woffset >= length($$wbuf)) { # need to refill buffer from $content_ref code my $buf = &$content_ref(); $buf = "" unless defined($buf); $eof++ unless length($buf); $buf = sprintf "%x%s%s%s", length($buf), $CRLF, $buf, $CRLF if $chunked; $wbuf = \$buf; $woffset = 0; } } } # WRITE } ($code, $mess, @h) = $socket->read_response_headers(laxed => 1, junk_out => \@junk) unless $code; ($code, $mess, @h) = $socket->read_response_headers(laxed => 1, junk_out => \@junk) if $code eq "100"; my $response = HTTP::Response->new($code, $mess); my $peer_http_version = $socket->peer_http_version; $response->protocol("HTTP/$peer_http_version"); { local $HTTP::Headers::TRANSLATE_UNDERSCORE; $response->push_header(@h); } $response->push_header("Client-Junk" => \@junk) if @junk; $response->request($request); $self->_get_sock_info($response, $socket); if ($method eq "CONNECT") { $response->{client_socket} = $socket; # so it can be picked up return $response; } if (my @te = $response->remove_header('Transfer-Encoding')) { $response->push_header('Client-Transfer-Encoding', \@te); } $response->push_header('Client-Response-Num', scalar $socket->increment_response_count); my $complete; $response = $self->collect($arg, $response, sub { my $buf = ""; #prevent use of uninitialized value in SSLeay.xs my $n; READ: { $n = $socket->read_entity_body($buf, $size); unless (defined $n) { redo READ if $!{EINTR} || $!{EAGAIN}; die "read failed: $!"; } redo READ if $n == -1; } $complete++ if !$n; return \$buf; } ); $drop_connection++ unless $complete; @h = $socket->get_trailers; if (@h) { local $HTTP::Headers::TRANSLATE_UNDERSCORE; $response->push_header(@h); } # keep-alive support unless ($drop_connection) { if (my $conn_cache = $self->{ua}{conn_cache}) { my %connection = map { (lc($_) => 1) } split(/\s*,\s*/, ($response->header("Connection") || "")); if (($peer_http_version eq "1.1" && !$connection{close}) || $connection{"keep-alive"}) { $conn_cache->deposit($self->socket_type, "$host:$port", $socket); } } } $response; } #----------------------------------------------------------- package LWP::Protocol::http::SocketMethods; sub ping { my $self = shift; !$self->can_read(0); } sub increment_response_count { my $self = shift; return ++${*$self}{'myhttp_response_count'}; } #----------------------------------------------------------- package LWP::Protocol::http::Socket; use vars qw(@ISA); @ISA = qw(LWP::Protocol::http::SocketMethods Net::HTTP); 1; Protocol/nogo.pm000064400000001166147633770250007667 0ustar00package LWP::Protocol::nogo; # If you want to disable access to a particular scheme, use this # class and then call # LWP::Protocol::implementor(that_scheme, 'LWP::Protocol::nogo'); # For then on, attempts to access URLs with that scheme will generate # a 500 error. use strict; use vars qw(@ISA); require HTTP::Response; require HTTP::Status; require LWP::Protocol; @ISA = qw(LWP::Protocol); sub request { my($self, $request) = @_; my $scheme = $request->uri->scheme; return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR, "Access to \'$scheme\' URIs has been disabled" ); } 1; Protocol/mailto.pm000064400000010514147633770250010207 0ustar00package LWP::Protocol::mailto; # This module implements the mailto protocol. It is just a simple # frontend to the Unix sendmail program except on MacOS, where it uses # Mail::Internet. require LWP::Protocol; require HTTP::Request; require HTTP::Response; require HTTP::Status; use Carp; use strict; use vars qw(@ISA $SENDMAIL); @ISA = qw(LWP::Protocol); unless ($SENDMAIL = $ENV{SENDMAIL}) { for my $sm (qw(/usr/sbin/sendmail /usr/lib/sendmail /usr/ucblib/sendmail )) { if (-x $sm) { $SENDMAIL = $sm; last; } } die "Can't find the 'sendmail' program" unless $SENDMAIL; } sub request { my($self, $request, $proxy, $arg, $size) = @_; my ($mail, $addr) if $^O eq "MacOS"; my @text = () if $^O eq "MacOS"; # check proxy if (defined $proxy) { return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, 'You can not proxy with mail'); } # check method my $method = $request->method; if ($method ne 'POST') { return HTTP::Response->new( &HTTP::Status::RC_BAD_REQUEST, 'Library does not allow method ' . "$method for 'mailto:' URLs"); } # check url my $url = $request->uri; my $scheme = $url->scheme; if ($scheme ne 'mailto') { return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR, "LWP::Protocol::mailto::request called for '$scheme'"); } if ($^O eq "MacOS") { eval { require Mail::Internet; }; if($@) { return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR, "You don't have MailTools installed"); } unless ($ENV{SMTPHOSTS}) { return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR, "You don't have SMTPHOSTS defined"); } } else { unless (-x $SENDMAIL) { return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR, "You don't have $SENDMAIL"); } } if ($^O eq "MacOS") { $mail = Mail::Internet->new or return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR, "Can't get a Mail::Internet object"); } else { open(SENDMAIL, "| $SENDMAIL -oi -t") or return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR, "Can't run $SENDMAIL: $!"); } if ($^O eq "MacOS") { $addr = $url->encoded822addr; } else { $request = $request->clone; # we modify a copy my @h = $url->headers; # URL headers override those in the request while (@h) { my $k = shift @h; my $v = shift @h; next unless defined $v; if (lc($k) eq "body") { $request->content($v); } else { $request->push_header($k => $v); } } } if ($^O eq "MacOS") { $mail->add(To => $addr); $mail->add(split(/[:\n]/,$request->headers_as_string)); } else { print SENDMAIL $request->headers_as_string; print SENDMAIL "\n"; } my $content = $request->content; if (defined $content) { my $contRef = ref($content) ? $content : \$content; if (ref($contRef) eq 'SCALAR') { if ($^O eq "MacOS") { @text = split("\n",$$contRef); foreach (@text) { $_ .= "\n"; } } else { print SENDMAIL $$contRef; } } elsif (ref($contRef) eq 'CODE') { # Callback provides data my $d; if ($^O eq "MacOS") { my $stuff = ""; while (length($d = &$contRef)) { $stuff .= $d; } @text = split("\n",$stuff); foreach (@text) { $_ .= "\n"; } } else { print SENDMAIL $d; } } } if ($^O eq "MacOS") { $mail->body(\@text); unless ($mail->smtpsend) { return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR, "Mail::Internet->smtpsend unable to send message to <$addr>"); } } else { unless (close(SENDMAIL)) { my $err = $! ? "$!" : "Exit status $?"; return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR, "$SENDMAIL: $err"); } } my $response = HTTP::Response->new(&HTTP::Status::RC_ACCEPTED, "Mail accepted"); $response->header('Content-Type', 'text/plain'); if ($^O eq "MacOS") { $response->header('Server' => "Mail::Internet $Mail::Internet::VERSION"); $response->content("Message sent to <$addr>\n"); } else { $response->header('Server' => $SENDMAIL); my $to = $request->header("To"); $response->content("Message sent to <$to>\n"); } return $response; } 1; Protocol/GHTTP.pm000064400000003346147633770250007615 0ustar00package LWP::Protocol::GHTTP; # You can tell LWP to use this module for 'http' requests by running # code like this before you make requests: # # require LWP::Protocol::GHTTP; # LWP::Protocol::implementor('http', 'LWP::Protocol::GHTTP'); # use strict; use vars qw(@ISA); require LWP::Protocol; @ISA=qw(LWP::Protocol); require HTTP::Response; require HTTP::Status; use HTTP::GHTTP qw(METHOD_GET METHOD_HEAD METHOD_POST); my %METHOD = ( GET => METHOD_GET, HEAD => METHOD_HEAD, POST => METHOD_POST, ); sub request { my($self, $request, $proxy, $arg, $size, $timeout) = @_; my $method = $request->method; unless (exists $METHOD{$method}) { return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, "Bad method '$method'"); } my $r = HTTP::GHTTP->new($request->uri); # XXX what headers for repeated headers here? $request->headers->scan(sub { $r->set_header(@_)}); $r->set_type($METHOD{$method}); # XXX should also deal with subroutine content. my $cref = $request->content_ref; $r->set_body($$cref) if length($$cref); # XXX is this right $r->set_proxy($proxy->as_string) if $proxy; $r->process_request; my $response = HTTP::Response->new($r->get_status); # XXX How can get the headers out of $r?? This way is too stupid. my @headers; eval { # Wrapped in eval because this method is not always available @headers = $r->get_headers; }; @headers = qw(Date Connection Server Content-type Accept-Ranges Server Content-Length Last-Modified ETag) if $@; for (@headers) { my $v = $r->get_header($_); $response->header($_ => $v) if defined $v; } return $self->collect_once($arg, $response, $r->get_body); } 1; Protocol/data.pm000064400000002344147633770250007635 0ustar00package LWP::Protocol::data; # Implements access to data:-URLs as specified in RFC 2397 use strict; use vars qw(@ISA); require HTTP::Response; require HTTP::Status; require LWP::Protocol; @ISA = qw(LWP::Protocol); use HTTP::Date qw(time2str); require LWP; # needs version number sub request { my($self, $request, $proxy, $arg, $size) = @_; # check proxy if (defined $proxy) { return HTTP::Response->new( &HTTP::Status::RC_BAD_REQUEST, 'You can not proxy with data'); } # check method my $method = $request->method; unless ($method eq 'GET' || $method eq 'HEAD') { return HTTP::Response->new( &HTTP::Status::RC_BAD_REQUEST, 'Library does not allow method ' . "$method for 'data:' URLs"); } my $url = $request->uri; my $response = HTTP::Response->new( &HTTP::Status::RC_OK, "Document follows"); my $media_type = $url->media_type; my $data = $url->data; $response->header('Content-Type' => $media_type, 'Content-Length' => length($data), 'Date' => time2str(time), 'Server' => "libwww-perl-internal/$LWP::VERSION" ); $data = "" if $method eq "HEAD"; return $self->collect_once($arg, $response, $data); } 1; Protocol/loopback.pm000064400000001131147633770250010507 0ustar00package LWP::Protocol::loopback; use strict; use vars qw(@ISA); require HTTP::Response; require LWP::Protocol; @ISA = qw(LWP::Protocol); sub request { my($self, $request, $proxy, $arg, $size, $timeout) = @_; my $response = HTTP::Response->new(200, "OK"); $response->content_type("message/http; msgtype=request"); $response->header("Via", "loopback/1.0 $proxy") if $proxy; $response->header("X-Arg", $arg); $response->header("X-Read-Size", $size); $response->header("X-Timeout", $timeout); return $self->collect_once($arg, $response, $request->as_string); } 1; Protocol/cpan.pm000064400000002544147633770250007647 0ustar00package LWP::Protocol::cpan; use strict; use vars qw(@ISA); require LWP::Protocol; @ISA = qw(LWP::Protocol); require URI; require HTTP::Status; require HTTP::Response; our $CPAN; unless ($CPAN) { # Try to find local CPAN mirror via $CPAN::Config eval { require CPAN::Config; if($CPAN::Config) { my $urls = $CPAN::Config->{urllist}; if (ref($urls) eq "ARRAY") { my $file; for (@$urls) { if (/^file:/) { $file = $_; last; } } if ($file) { $CPAN = $file; } else { $CPAN = $urls->[0]; } } } }; $CPAN ||= "http://cpan.org/"; # last resort } # ensure that we don't chop of last part $CPAN .= "/" unless $CPAN =~ m,/$,; sub request { my($self, $request, $proxy, $arg, $size) = @_; # check proxy if (defined $proxy) { return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, 'You can not proxy with cpan'); } # check method my $method = $request->method; unless ($method eq 'GET' || $method eq 'HEAD') { return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, 'Library does not allow method ' . "$method for 'cpan:' URLs"); } my $path = $request->uri->path; $path =~ s,^/,,; my $response = HTTP::Response->new(&HTTP::Status::RC_FOUND); $response->header("Location" => URI->new_abs($path, $CPAN)); $response; } 1; Protocol/nntp.pm000064400000007441147633770250007706 0ustar00package LWP::Protocol::nntp; # Implementation of the Network News Transfer Protocol (RFC 977) require LWP::Protocol; @ISA = qw(LWP::Protocol); require HTTP::Response; require HTTP::Status; require Net::NNTP; use strict; sub request { my($self, $request, $proxy, $arg, $size, $timeout) = @_; $size = 4096 unless $size; # Check for proxy if (defined $proxy) { return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, 'You can not proxy through NNTP'); } # Check that the scheme is as expected my $url = $request->uri; my $scheme = $url->scheme; unless ($scheme eq 'news' || $scheme eq 'nntp') { return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR, "LWP::Protocol::nntp::request called for '$scheme'"); } # check for a valid method my $method = $request->method; unless ($method eq 'GET' || $method eq 'HEAD' || $method eq 'POST') { return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, 'Library does not allow method ' . "$method for '$scheme:' URLs"); } # extract the identifier and check against posting to an article my $groupart = $url->_group; my $is_art = $groupart =~ /@/; if ($is_art && $method eq 'POST') { return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, "Can't post to an article <$groupart>"); } my $nntp = Net::NNTP->new($url->host, #Port => 18574, Timeout => $timeout, #Debug => 1, ); die "Can't connect to nntp server" unless $nntp; # Check the initial welcome message from the NNTP server if ($nntp->status != 2) { return HTTP::Response->new(&HTTP::Status::RC_SERVICE_UNAVAILABLE, $nntp->message); } my $response = HTTP::Response->new(&HTTP::Status::RC_OK, "OK"); my $mess = $nntp->message; # Try to extract server name from greeting message. # Don't know if this works well for a large class of servers, but # this works for our server. $mess =~ s/\s+ready\b.*//; $mess =~ s/^\S+\s+//; $response->header(Server => $mess); # First we handle posting of articles if ($method eq 'POST') { $nntp->quit; $nntp = undef; $response->code(&HTTP::Status::RC_NOT_IMPLEMENTED); $response->message("POST not implemented yet"); return $response; } # The method must be "GET" or "HEAD" by now if (!$is_art) { if (!$nntp->group($groupart)) { $response->code(&HTTP::Status::RC_NOT_FOUND); $response->message($nntp->message); } $nntp->quit; $nntp = undef; # HEAD: just check if the group exists if ($method eq 'GET' && $response->is_success) { $response->code(&HTTP::Status::RC_NOT_IMPLEMENTED); $response->message("GET newsgroup not implemented yet"); } return $response; } # Send command to server to retrieve an article (or just the headers) my $get = $method eq 'HEAD' ? "head" : "article"; my $art = $nntp->$get("<$groupart>"); unless ($art) { $nntp->quit; $nntp = undef; $response->code(&HTTP::Status::RC_NOT_FOUND); $response->message($nntp->message); return $response; } # Parse headers my($key, $val); local $_; while ($_ = shift @$art) { if (/^\s+$/) { last; # end of headers } elsif (/^(\S+):\s*(.*)/) { $response->push_header($key, $val) if $key; ($key, $val) = ($1, $2); } elsif (/^\s+(.*)/) { next unless $key; $val .= $1; } else { unshift(@$art, $_); last; } } $response->push_header($key, $val) if $key; # Ensure that there is a Content-Type header $response->header("Content-Type", "text/plain") unless $response->header("Content-Type"); # Collect the body $response = $self->collect_once($arg, $response, join("", @$art)) if @$art; # Say goodbye to the server $nntp->quit; $nntp = undef; $response; } 1; Protocol/gopher.pm000064400000013167147633770250010215 0ustar00package LWP::Protocol::gopher; # Implementation of the gopher protocol (RFC 1436) # # This code is based on 'wwwgopher.pl,v 0.10 1994/10/17 18:12:34 shelden' # which in turn is a vastly modified version of Oscar's http'get() # dated 28/3/94 in # including contributions from Marc van Heyningen and Martijn Koster. use strict; use vars qw(@ISA); require HTTP::Response; require HTTP::Status; require IO::Socket; require IO::Select; require LWP::Protocol; @ISA = qw(LWP::Protocol); my %gopher2mimetype = ( '0' => 'text/plain', # 0 file '1' => 'text/html', # 1 menu # 2 CSO phone-book server # 3 Error '4' => 'application/mac-binhex40', # 4 BinHexed Macintosh file '5' => 'application/zip', # 5 DOS binary archive of some sort '6' => 'application/octet-stream', # 6 UNIX uuencoded file. '7' => 'text/html', # 7 Index-Search server # 8 telnet session '9' => 'application/octet-stream', # 9 binary file 'h' => 'text/html', # html 'g' => 'image/gif', # gif 'I' => 'image/*', # some kind of image ); my %gopher2encoding = ( '6' => 'x_uuencode', # 6 UNIX uuencoded file. ); sub request { my($self, $request, $proxy, $arg, $size, $timeout) = @_; $size = 4096 unless $size; # check proxy if (defined $proxy) { return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, 'You can not proxy through the gopher'); } my $url = $request->uri; die "bad scheme" if $url->scheme ne 'gopher'; my $method = $request->method; unless ($method eq 'GET' || $method eq 'HEAD') { return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, 'Library does not allow method ' . "$method for 'gopher:' URLs"); } my $gophertype = $url->gopher_type; unless (exists $gopher2mimetype{$gophertype}) { return HTTP::Response->new(&HTTP::Status::RC_NOT_IMPLEMENTED, 'Library does not support gophertype ' . $gophertype); } my $response = HTTP::Response->new(&HTTP::Status::RC_OK, "OK"); $response->header('Content-type' => $gopher2mimetype{$gophertype} || 'text/plain'); $response->header('Content-Encoding' => $gopher2encoding{$gophertype}) if exists $gopher2encoding{$gophertype}; if ($method eq 'HEAD') { # XXX: don't even try it so we set this header $response->header('Client-Warning' => 'Client answer only'); return $response; } if ($gophertype eq '7' && ! $url->search) { # the url is the prompt for a gopher search; supply boiler-plate return $self->collect_once($arg, $response, <<"EOT"); Gopher Index

    $url
    Gopher Search

    This is a searchable Gopher index. Use the search function of your browser to enter search terms. EOT } my $host = $url->host; my $port = $url->port; my $requestLine = ""; my $selector = $url->selector; if (defined $selector) { $requestLine .= $selector; my $search = $url->search; if (defined $search) { $requestLine .= "\t$search"; my $string = $url->string; if (defined $string) { $requestLine .= "\t$string"; } } } $requestLine .= "\015\012"; # potential request headers are just ignored # Ok, lets make the request my $socket = IO::Socket::INET->new(PeerAddr => $host, PeerPort => $port, LocalAddr => $self->{ua}{local_address}, Proto => 'tcp', Timeout => $timeout); die "Can't connect to $host:$port" unless $socket; my $sel = IO::Select->new($socket); { die "write timeout" if $timeout && !$sel->can_write($timeout); my $n = syswrite($socket, $requestLine, length($requestLine)); die $! unless defined($n); die "short write" if $n != length($requestLine); } my $user_arg = $arg; # must handle menus in a special way since they are to be # converted to HTML. Undefing $arg ensures that the user does # not see the data before we get a change to convert it. $arg = undef if $gophertype eq '1' || $gophertype eq '7'; # collect response my $buf = ''; $response = $self->collect($arg, $response, sub { die "read timeout" if $timeout && !$sel->can_read($timeout); my $n = sysread($socket, $buf, $size); die $! unless defined($n); return \$buf; } ); # Convert menu to HTML and return data to user. if ($gophertype eq '1' || $gophertype eq '7') { my $content = menu2html($response->content); if (defined $user_arg) { $response = $self->collect_once($user_arg, $response, $content); } else { $response->content($content); } } $response; } sub gopher2url { my($gophertype, $path, $host, $port) = @_; my $url; if ($gophertype eq '8' || $gophertype eq 'T') { # telnet session $url = $HTTP::URI_CLASS->new($gophertype eq '8' ? 'telnet:':'tn3270:'); $url->user($path) if defined $path; } else { $path = URI::Escape::uri_escape($path); $url = $HTTP::URI_CLASS->new("gopher:/$gophertype$path"); } $url->host($host); $url->port($port); $url; } sub menu2html { my($menu) = @_; $menu =~ s/\015//g; # remove carriage return my $tmp = <<"EOT"; Gopher menu

    Gopher menu

    EOT for (split("\n", $menu)) { last if /^\./; my($pretty, $path, $host, $port) = split("\t"); $pretty =~ s/^(.)//; my $type = $1; my $url = gopher2url($type, $path, $host, $port)->as_string; $tmp .= qq{$pretty
    \n}; } $tmp .= "\n\n"; $tmp; } 1; Protocol/ftp.pm000064400000040660147633770250007520 0ustar00package LWP::Protocol::ftp; # Implementation of the ftp protocol (RFC 959). We let the Net::FTP # package do all the dirty work. use Carp (); use HTTP::Status (); use HTTP::Negotiate (); use HTTP::Response (); use LWP::MediaTypes (); use File::Listing (); require LWP::Protocol; @ISA = qw(LWP::Protocol); use strict; eval { package LWP::Protocol::MyFTP; require Net::FTP; Net::FTP->require_version(2.00); use vars qw(@ISA); @ISA=qw(Net::FTP); sub new { my $class = shift; my $self = $class->SUPER::new(@_) || return undef; my $mess = $self->message; # welcome message $mess =~ s|\n.*||s; # only first line left $mess =~ s|\s*ready\.?$||; # Make the version number more HTTP like $mess =~ s|\s*\(Version\s*|/| and $mess =~ s|\)$||; ${*$self}{myftp_server} = $mess; #$response->header("Server", $mess); $self; } sub http_server { my $self = shift; ${*$self}{myftp_server}; } sub home { my $self = shift; my $old = ${*$self}{myftp_home}; if (@_) { ${*$self}{myftp_home} = shift; } $old; } sub go_home { my $self = shift; $self->cwd(${*$self}{myftp_home}); } sub request_count { my $self = shift; ++${*$self}{myftp_reqcount}; } sub ping { my $self = shift; return $self->go_home; } }; my $init_failed = $@; sub _connect { my($self, $host, $port, $user, $account, $password, $timeout) = @_; my $key; my $conn_cache = $self->{ua}{conn_cache}; if ($conn_cache) { $key = "$host:$port:$user"; $key .= ":$account" if defined($account); if (my $ftp = $conn_cache->withdraw("ftp", $key)) { if ($ftp->ping) { # save it again $conn_cache->deposit("ftp", $key, $ftp); return $ftp; } } } # try to make a connection my $ftp = LWP::Protocol::MyFTP->new($host, Port => $port, Timeout => $timeout, LocalAddr => $self->{ua}{local_address}, ); # XXX Should be some what to pass on 'Passive' (header??) unless ($ftp) { $@ =~ s/^Net::FTP: //; return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR, $@); } unless ($ftp->login($user, $password, $account)) { # Unauthorized. Let's fake a RC_UNAUTHORIZED response my $mess = scalar($ftp->message); $mess =~ s/\n$//; my $res = HTTP::Response->new(&HTTP::Status::RC_UNAUTHORIZED, $mess); $res->header("Server", $ftp->http_server); $res->header("WWW-Authenticate", qq(Basic Realm="FTP login")); return $res; } my $home = $ftp->pwd; $ftp->home($home); $conn_cache->deposit("ftp", $key, $ftp) if $conn_cache; return $ftp; } sub request { my($self, $request, $proxy, $arg, $size, $timeout) = @_; $size = 4096 unless $size; # check proxy if (defined $proxy) { return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, 'You can not proxy through the ftp'); } my $url = $request->uri; if ($url->scheme ne 'ftp') { my $scheme = $url->scheme; return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR, "LWP::Protocol::ftp::request called for '$scheme'"); } # check method my $method = $request->method; unless ($method eq 'GET' || $method eq 'HEAD' || $method eq 'PUT') { return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, 'Library does not allow method ' . "$method for 'ftp:' URLs"); } if ($init_failed) { return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR, $init_failed); } my $host = $url->host; my $port = $url->port; my $user = $url->user; my $password = $url->password; # If a basic autorization header is present than we prefer these over # the username/password specified in the URL. { my($u,$p) = $request->authorization_basic; if (defined $u) { $user = $u; $password = $p; } } # We allow the account to be specified in the "Account" header my $account = $request->header('Account'); my $ftp = $self->_connect($host, $port, $user, $account, $password, $timeout); return $ftp if ref($ftp) eq "HTTP::Response"; # ugh! # Create an initial response object my $response = HTTP::Response->new(&HTTP::Status::RC_OK, "OK"); $response->header(Server => $ftp->http_server); $response->header('Client-Request-Num' => $ftp->request_count); $response->request($request); # Get & fix the path my @path = grep { length } $url->path_segments; my $remote_file = pop(@path); $remote_file = '' unless defined $remote_file; my $type; if (ref $remote_file) { my @params; ($remote_file, @params) = @$remote_file; for (@params) { $type = $_ if s/^type=//; } } if ($type && $type eq 'a') { $ftp->ascii; } else { $ftp->binary; } for (@path) { unless ($ftp->cwd($_)) { return HTTP::Response->new(&HTTP::Status::RC_NOT_FOUND, "Can't chdir to $_"); } } if ($method eq 'GET' || $method eq 'HEAD') { if (my $mod_time = $ftp->mdtm($remote_file)) { $response->last_modified($mod_time); if (my $ims = $request->if_modified_since) { if ($mod_time <= $ims) { $response->code(&HTTP::Status::RC_NOT_MODIFIED); $response->message("Not modified"); return $response; } } } # We'll use this later to abort the transfer if necessary. # if $max_size is defined, we need to abort early. Otherwise, it's # a normal transfer my $max_size = undef; # Set resume location, if the client requested it if ($request->header('Range') && $ftp->supported('REST')) { my $range_info = $request->header('Range'); # Change bytes=2772992-6781209 to just 2772992 my ($start_byte,$end_byte) = $range_info =~ /.*=\s*(\d+)-(\d+)?/; if ( defined $start_byte && !defined $end_byte ) { # open range -- only the start is specified $ftp->restart( $start_byte ); # don't define $max_size, we don't want to abort early } elsif ( defined $start_byte && defined $end_byte && $start_byte >= 0 && $end_byte >= $start_byte ) { $ftp->restart( $start_byte ); $max_size = $end_byte - $start_byte; } else { return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, 'Incorrect syntax for Range request'); } } elsif ($request->header('Range') && !$ftp->supported('REST')) { return HTTP::Response->new(&HTTP::Status::RC_NOT_IMPLEMENTED, "Server does not support resume."); } my $data; # the data handle if (length($remote_file) and $data = $ftp->retr($remote_file)) { my($type, @enc) = LWP::MediaTypes::guess_media_type($remote_file); $response->header('Content-Type', $type) if $type; for (@enc) { $response->push_header('Content-Encoding', $_); } my $mess = $ftp->message; if ($mess =~ /\((\d+)\s+bytes\)/) { $response->header('Content-Length', "$1"); } if ($method ne 'HEAD') { # Read data from server $response = $self->collect($arg, $response, sub { my $content = ''; my $result = $data->read($content, $size); # Stop early if we need to. if (defined $max_size) { # We need an interface to Net::FTP::dataconn for getting # the number of bytes already read my $bytes_received = $data->bytes_read(); # We were already over the limit. (Should only happen # once at the end.) if ($bytes_received - length($content) > $max_size) { $content = ''; } # We just went over the limit elsif ($bytes_received > $max_size) { # Trim content $content = substr($content, 0, $max_size - ($bytes_received - length($content)) ); } # We're under the limit else { } } return \$content; } ); } # abort is needed for HEAD, it's == close if the transfer has # already completed. unless ($data->abort) { # Something did not work too well. Note that we treat # responses to abort() with code 0 in case of HEAD as ok # (at least wu-ftpd 2.6.1(1) does that). if ($method ne 'HEAD' || $ftp->code != 0) { $response->code(&HTTP::Status::RC_INTERNAL_SERVER_ERROR); $response->message("FTP close response: " . $ftp->code . " " . $ftp->message); } } } elsif (!length($remote_file) || ( $ftp->code >= 400 && $ftp->code < 600 )) { # not a plain file, try to list instead if (length($remote_file) && !$ftp->cwd($remote_file)) { return HTTP::Response->new(&HTTP::Status::RC_NOT_FOUND, "File '$remote_file' not found"); } # It should now be safe to try to list the directory my @lsl = $ftp->dir; # Try to figure out if the user want us to convert the # directory listing to HTML. my @variants = ( ['html', 0.60, 'text/html' ], ['dir', 1.00, 'text/ftp-dir-listing' ] ); #$HTTP::Negotiate::DEBUG=1; my $prefer = HTTP::Negotiate::choose(\@variants, $request); my $content = ''; if (!defined($prefer)) { return HTTP::Response->new(&HTTP::Status::RC_NOT_ACCEPTABLE, "Neither HTML nor directory listing wanted"); } elsif ($prefer eq 'html') { $response->header('Content-Type' => 'text/html'); $content = "File Listing\n"; my $base = $request->uri->clone; my $path = $base->path; $base->path("$path/") unless $path =~ m|/$|; $content .= qq(\n\n); $content .= "\n
      \n"; for (File::Listing::parse_dir(\@lsl, 'GMT')) { my($name, $type, $size, $mtime, $mode) = @$_; $content .= qq(
    • $name); $content .= " $size bytes" if $type eq 'f'; $content .= "\n"; } $content .= "
    \n"; } else { $response->header('Content-Type', 'text/ftp-dir-listing'); $content = join("\n", @lsl, ''); } $response->header('Content-Length', length($content)); if ($method ne 'HEAD') { $response = $self->collect_once($arg, $response, $content); } } else { my $res = HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, "FTP return code " . $ftp->code); $res->content_type("text/plain"); $res->content($ftp->message); return $res; } } elsif ($method eq 'PUT') { # method must be PUT unless (length($remote_file)) { return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, "Must have a file name to PUT to"); } my $data; if ($data = $ftp->stor($remote_file)) { my $content = $request->content; my $bytes = 0; if (defined $content) { if (ref($content) eq 'SCALAR') { $bytes = $data->write($$content, length($$content)); } elsif (ref($content) eq 'CODE') { my($buf, $n); while (length($buf = &$content)) { $n = $data->write($buf, length($buf)); last unless $n; $bytes += $n; } } elsif (!ref($content)) { if (defined $content && length($content)) { $bytes = $data->write($content, length($content)); } } else { die "Bad content"; } } $data->close; $response->code(&HTTP::Status::RC_CREATED); $response->header('Content-Type', 'text/plain'); $response->content("$bytes bytes stored as $remote_file on $host\n") } else { my $res = HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, "FTP return code " . $ftp->code); $res->content_type("text/plain"); $res->content($ftp->message); return $res; } } else { return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, "Illegal method $method"); } $response; } 1; __END__ # This is what RFC 1738 has to say about FTP access: # -------------------------------------------------- # # 3.2. FTP # # The FTP URL scheme is used to designate files and directories on # Internet hosts accessible using the FTP protocol (RFC959). # # A FTP URL follow the syntax described in Section 3.1. If : is # omitted, the port defaults to 21. # # 3.2.1. FTP Name and Password # # A user name and password may be supplied; they are used in the ftp # "USER" and "PASS" commands after first making the connection to the # FTP server. If no user name or password is supplied and one is # requested by the FTP server, the conventions for "anonymous" FTP are # to be used, as follows: # # The user name "anonymous" is supplied. # # The password is supplied as the Internet e-mail address # of the end user accessing the resource. # # If the URL supplies a user name but no password, and the remote # server requests a password, the program interpreting the FTP URL # should request one from the user. # # 3.2.2. FTP url-path # # The url-path of a FTP URL has the following syntax: # # //...//;type= # # Where through and are (possibly encoded) strings # and is one of the characters "a", "i", or "d". The part # ";type=" may be omitted. The and parts may be # empty. The whole url-path may be omitted, including the "/" # delimiting it from the prefix containing user, password, host, and # port. # # The url-path is interpreted as a series of FTP commands as follows: # # Each of the elements is to be supplied, sequentially, as the # argument to a CWD (change working directory) command. # # If the typecode is "d", perform a NLST (name list) command with # as the argument, and interpret the results as a file # directory listing. # # Otherwise, perform a TYPE command with as the argument, # and then access the file whose name is (for example, using # the RETR command.) # # Within a name or CWD component, the characters "/" and ";" are # reserved and must be encoded. The components are decoded prior to # their use in the FTP protocol. In particular, if the appropriate FTP # sequence to access a particular file requires supplying a string # containing a "/" as an argument to a CWD or RETR command, it is # necessary to encode each "/". # # For example, the URL is # interpreted by FTP-ing to "host.dom", logging in as "myname" # (prompting for a password if it is asked for), and then executing # "CWD /etc" and then "RETR motd". This has a different meaning from # which would "CWD etc" and then # "RETR motd"; the initial "CWD" might be executed relative to the # default directory for "myname". On the other hand, # , would "CWD " with a null # argument, then "CWD etc", and then "RETR motd". # # FTP URLs may also be used for other operations; for example, it is # possible to update a file on a remote file server, or infer # information about it from the directory listings. The mechanism for # doing so is not spelled out here. # # 3.2.3. FTP Typecode is Optional # # The entire ;type= part of a FTP URL is optional. If it is # omitted, the client program interpreting the URL must guess the # appropriate mode to use. In general, the data content type of a file # can only be guessed from the name, e.g., from the suffix of the name; # the appropriate type code to be used for transfer of the file can # then be deduced from the data content of the file. # # 3.2.4 Hierarchy # # For some file systems, the "/" used to denote the hierarchical # structure of the URL corresponds to the delimiter used to construct a # file name hierarchy, and thus, the filename will look similar to the # URL path. This does NOT mean that the URL is a Unix filename. # # 3.2.5. Optimization # # Clients accessing resources via FTP may employ additional heuristics # to optimize the interaction. For some FTP servers, for example, it # may be reasonable to keep the control connection open while accessing # multiple URLs from the same server. However, there is no common # hierarchical model to the FTP protocol, so if a directory change # command has been given, it is impossible in general to deduce what # sequence should be given to navigate to another directory for a # second retrieval, if the paths are different. The only reliable # algorithm is to disconnect and reestablish the control connection. Protocol.pm000064400000017352147633770250006731 0ustar00package LWP::Protocol; require LWP::MemberMixin; @ISA = qw(LWP::MemberMixin); $VERSION = "6.00"; use strict; use Carp (); use HTTP::Status (); use HTTP::Response; my %ImplementedBy = (); # scheme => classname sub new { my($class, $scheme, $ua) = @_; my $self = bless { scheme => $scheme, ua => $ua, # historical/redundant max_size => $ua->{max_size}, }, $class; $self; } sub create { my($scheme, $ua) = @_; my $impclass = LWP::Protocol::implementor($scheme) or Carp::croak("Protocol scheme '$scheme' is not supported"); # hand-off to scheme specific implementation sub-class my $protocol = $impclass->new($scheme, $ua); return $protocol; } sub implementor { my($scheme, $impclass) = @_; if ($impclass) { $ImplementedBy{$scheme} = $impclass; } my $ic = $ImplementedBy{$scheme}; return $ic if $ic; return '' unless $scheme =~ /^([.+\-\w]+)$/; # check valid URL schemes $scheme = $1; # untaint $scheme =~ s/[.+\-]/_/g; # make it a legal module name # scheme not yet known, look for a 'use'd implementation $ic = "LWP::Protocol::$scheme"; # default location $ic = "LWP::Protocol::nntp" if $scheme eq 'news'; #XXX ugly hack no strict 'refs'; # check we actually have one for the scheme: unless (@{"${ic}::ISA"}) { # try to autoload it eval "require $ic"; if ($@) { if ($@ =~ /Can't locate/) { #' #emacs get confused by ' $ic = ''; } else { die "$@\n"; } } } $ImplementedBy{$scheme} = $ic if $ic; $ic; } sub request { my($self, $request, $proxy, $arg, $size, $timeout) = @_; Carp::croak('LWP::Protocol::request() needs to be overridden in subclasses'); } # legacy sub timeout { shift->_elem('timeout', @_); } sub max_size { shift->_elem('max_size', @_); } sub collect { my ($self, $arg, $response, $collector) = @_; my $content; my($ua, $max_size) = @{$self}{qw(ua max_size)}; eval { local $\; # protect the print below from surprises if (!defined($arg) || !$response->is_success) { $response->{default_add_content} = 1; } elsif (!ref($arg) && length($arg)) { open(my $fh, ">", $arg) or die "Can't write to '$arg': $!"; binmode($fh); push(@{$response->{handlers}{response_data}}, { callback => sub { print $fh $_[3] or die "Can't write to '$arg': $!"; 1; }, }); push(@{$response->{handlers}{response_done}}, { callback => sub { close($fh) or die "Can't write to '$arg': $!"; undef($fh); }, }); } elsif (ref($arg) eq 'CODE') { push(@{$response->{handlers}{response_data}}, { callback => sub { &$arg($_[3], $_[0], $self); 1; }, }); } else { die "Unexpected collect argument '$arg'"; } $ua->run_handlers("response_header", $response); if (delete $response->{default_add_content}) { push(@{$response->{handlers}{response_data}}, { callback => sub { $_[0]->add_content($_[3]); 1; }, }); } my $content_size = 0; my $length = $response->content_length; my %skip_h; while ($content = &$collector, length $$content) { for my $h ($ua->handlers("response_data", $response)) { next if $skip_h{$h}; unless ($h->{callback}->($response, $ua, $h, $$content)) { # XXX remove from $response->{handlers}{response_data} if present $skip_h{$h}++; } } $content_size += length($$content); $ua->progress(($length ? ($content_size / $length) : "tick"), $response); if (defined($max_size) && $content_size > $max_size) { $response->push_header("Client-Aborted", "max_size"); last; } } }; my $err = $@; delete $response->{handlers}{response_data}; delete $response->{handlers} unless %{$response->{handlers}}; if ($err) { chomp($err); $response->push_header('X-Died' => $err); $response->push_header("Client-Aborted", "die"); return $response; } return $response; } sub collect_once { my($self, $arg, $response) = @_; my $content = \ $_[3]; my $first = 1; $self->collect($arg, $response, sub { return $content if $first--; return \ ""; }); } 1; __END__ =head1 NAME LWP::Protocol - Base class for LWP protocols =head1 SYNOPSIS package LWP::Protocol::foo; require LWP::Protocol; @ISA=qw(LWP::Protocol); =head1 DESCRIPTION This class is used a the base class for all protocol implementations supported by the LWP library. When creating an instance of this class using C, and you get an initialised subclass appropriate for that access method. In other words, the LWP::Protocol::create() function calls the constructor for one of its subclasses. All derived LWP::Protocol classes need to override the request() method which is used to service a request. The overridden method can make use of the collect() function to collect together chunks of data as it is received. The following methods and functions are provided: =over 4 =item $prot = LWP::Protocol->new() The LWP::Protocol constructor is inherited by subclasses. As this is a virtual base class this method should B be called directly. =item $prot = LWP::Protocol::create($scheme) Create an object of the class implementing the protocol to handle the given scheme. This is a function, not a method. It is more an object factory than a constructor. This is the function user agents should use to access protocols. =item $class = LWP::Protocol::implementor($scheme, [$class]) Get and/or set implementor class for a scheme. Returns '' if the specified scheme is not supported. =item $prot->request(...) $response = $protocol->request($request, $proxy, undef); $response = $protocol->request($request, $proxy, '/tmp/sss'); $response = $protocol->request($request, $proxy, \&callback, 1024); Dispatches a request over the protocol, and returns a response object. This method needs to be overridden in subclasses. Refer to L for description of the arguments. =item $prot->collect($arg, $response, $collector) Called to collect the content of a request, and process it appropriately into a scalar, file, or by calling a callback. If $arg is undefined, then the content is stored within the $response. If $arg is a simple scalar, then $arg is interpreted as a file name and the content is written to this file. If $arg is a reference to a routine, then content is passed to this routine. The $collector is a routine that will be called and which is responsible for returning pieces (as ref to scalar) of the content to process. The $collector signals EOF by returning a reference to an empty sting. The return value from collect() is the $response object reference. B We will only use the callback or file argument if $response->is_success(). This avoids sending content data for redirects and authentication responses to the callback which would be confusing. =item $prot->collect_once($arg, $response, $content) Can be called when the whole response content is available as $content. This will invoke collect() with a collector callback that returns a reference to $content the first time and an empty string the next. =back =head1 SEE ALSO Inspect the F and F files for examples of usage. =head1 COPYRIGHT Copyright 1995-2001 Gisle Aas. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. RobotUA.pm000064400000017041147633770250006436 0ustar00package LWP::RobotUA; require LWP::UserAgent; @ISA = qw(LWP::UserAgent); $VERSION = "6.03"; require WWW::RobotRules; require HTTP::Request; require HTTP::Response; use Carp (); use HTTP::Status (); use HTTP::Date qw(time2str); use strict; # # Additional attributes in addition to those found in LWP::UserAgent: # # $self->{'delay'} Required delay between request to the same # server in minutes. # # $self->{'rules'} A WWW::RobotRules object # sub new { my $class = shift; my %cnf; if (@_ < 4) { # legacy args @cnf{qw(agent from rules)} = @_; } else { %cnf = @_; } Carp::croak('LWP::RobotUA agent required') unless $cnf{agent}; Carp::croak('LWP::RobotUA from address required') unless $cnf{from} && $cnf{from} =~ m/\@/; my $delay = delete $cnf{delay} || 1; my $use_sleep = delete $cnf{use_sleep}; $use_sleep = 1 unless defined($use_sleep); my $rules = delete $cnf{rules}; my $self = LWP::UserAgent->new(%cnf); $self = bless $self, $class; $self->{'delay'} = $delay; # minutes $self->{'use_sleep'} = $use_sleep; if ($rules) { $rules->agent($cnf{agent}); $self->{'rules'} = $rules; } else { $self->{'rules'} = WWW::RobotRules->new($cnf{agent}); } $self; } sub delay { shift->_elem('delay', @_); } sub use_sleep { shift->_elem('use_sleep', @_); } sub agent { my $self = shift; my $old = $self->SUPER::agent(@_); if (@_) { # Changing our name means to start fresh $self->{'rules'}->agent($self->{'agent'}); } $old; } sub rules { my $self = shift; my $old = $self->_elem('rules', @_); $self->{'rules'}->agent($self->{'agent'}) if @_; $old; } sub no_visits { my($self, $netloc) = @_; $self->{'rules'}->no_visits($netloc) || 0; } *host_count = \&no_visits; # backwards compatibility with LWP-5.02 sub host_wait { my($self, $netloc) = @_; return undef unless defined $netloc; my $last = $self->{'rules'}->last_visit($netloc); if ($last) { my $wait = int($self->{'delay'} * 60 - (time - $last)); $wait = 0 if $wait < 0; return $wait; } return 0; } sub simple_request { my($self, $request, $arg, $size) = @_; # Do we try to access a new server? my $allowed = $self->{'rules'}->allowed($request->uri); if ($allowed < 0) { # Host is not visited before, or robots.txt expired; fetch "robots.txt" my $robot_url = $request->uri->clone; $robot_url->path("robots.txt"); $robot_url->query(undef); # make access to robot.txt legal since this will be a recursive call $self->{'rules'}->parse($robot_url, ""); my $robot_req = HTTP::Request->new('GET', $robot_url); my $parse_head = $self->parse_head(0); my $robot_res = $self->request($robot_req); $self->parse_head($parse_head); my $fresh_until = $robot_res->fresh_until; my $content = ""; if ($robot_res->is_success && $robot_res->content_is_text) { $content = $robot_res->decoded_content; $content = "" unless $content && $content =~ /^\s*Disallow\s*:/mi; } $self->{'rules'}->parse($robot_url, $content, $fresh_until); # recalculate allowed... $allowed = $self->{'rules'}->allowed($request->uri); } # Check rules unless ($allowed) { my $res = HTTP::Response->new( &HTTP::Status::RC_FORBIDDEN, 'Forbidden by robots.txt'); $res->request( $request ); # bind it to that request return $res; } my $netloc = eval { local $SIG{__DIE__}; $request->uri->host_port; }; my $wait = $self->host_wait($netloc); if ($wait) { if ($self->{'use_sleep'}) { sleep($wait) } else { my $res = HTTP::Response->new( &HTTP::Status::RC_SERVICE_UNAVAILABLE, 'Please, slow down'); $res->header('Retry-After', time2str(time + $wait)); $res->request( $request ); # bind it to that request return $res; } } # Perform the request my $res = $self->SUPER::simple_request($request, $arg, $size); $self->{'rules'}->visit($netloc); $res; } sub as_string { my $self = shift; my @s; push(@s, "Robot: $self->{'agent'} operated by $self->{'from'} [$self]"); push(@s, " Minimum delay: " . int($self->{'delay'}*60) . "s"); push(@s, " Will sleep if too early") if $self->{'use_sleep'}; push(@s, " Rules = $self->{'rules'}"); join("\n", @s, ''); } 1; __END__ =head1 NAME LWP::RobotUA - a class for well-behaved Web robots =head1 SYNOPSIS use LWP::RobotUA; my $ua = LWP::RobotUA->new('my-robot/0.1', 'me@foo.com'); $ua->delay(10); # be very nice -- max one hit every ten minutes! ... # Then just use it just like a normal LWP::UserAgent: my $response = $ua->get('http://whatever.int/...'); ... =head1 DESCRIPTION This class implements a user agent that is suitable for robot applications. Robots should be nice to the servers they visit. They should consult the F file to ensure that they are welcomed and they should not make requests too frequently. But before you consider writing a robot, take a look at . When you use a I object as your user agent, then you do not really have to think about these things yourself; C files are automatically consulted and obeyed, the server isn't queried too rapidly, and so on. Just send requests as you do when you are using a normal I object (using C<< $ua->get(...) >>, C<< $ua->head(...) >>, C<< $ua->request(...) >>, etc.), and this special agent will make sure you are nice. =head1 METHODS The LWP::RobotUA is a sub-class of LWP::UserAgent and implements the same methods. In addition the following methods are provided: =over 4 =item $ua = LWP::RobotUA->new( %options ) =item $ua = LWP::RobotUA->new( $agent, $from ) =item $ua = LWP::RobotUA->new( $agent, $from, $rules ) The LWP::UserAgent options C and C are mandatory. The options C, C and C initialize attributes private to the RobotUA. If C are not provided, then C is instantiated providing an internal database of F. It is also possible to just pass the value of C, C and optionally C as plain positional arguments. =item $ua->delay =item $ua->delay( $minutes ) Get/set the minimum delay between requests to the same server, in I. The default is 1 minute. Note that this number doesn't have to be an integer; for example, this sets the delay to 10 seconds: $ua->delay(10/60); =item $ua->use_sleep =item $ua->use_sleep( $boolean ) Get/set a value indicating whether the UA should sleep() if requests arrive too fast, defined as $ua->delay minutes not passed since last request to the given server. The default is TRUE. If this value is FALSE then an internal SERVICE_UNAVAILABLE response will be generated. It will have an Retry-After header that indicates when it is OK to send another request to this server. =item $ua->rules =item $ua->rules( $rules ) Set/get which I object to use. =item $ua->no_visits( $netloc ) Returns the number of documents fetched from this server host. Yeah I know, this method should probably have been named num_visits() or something like that. :-( =item $ua->host_wait( $netloc ) Returns the number of I (from now) you must wait before you can make a new request to this host. =item $ua->as_string Returns a string that describes the state of the UA. Mainly useful for debugging. =back =head1 SEE ALSO L, L =head1 COPYRIGHT Copyright 1996-2004 Gisle Aas. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Debug.pm000064400000005006147633770250006147 0ustar00package LWP::Debug; # legacy require Exporter; @ISA = qw(Exporter); @EXPORT_OK = qw(level trace debug conns); use Carp (); my @levels = qw(trace debug conns); %current_level = (); sub import { my $pack = shift; my $callpkg = caller(0); my @symbols = (); my @levels = (); for (@_) { if (/^[-+]/) { push(@levels, $_); } else { push(@symbols, $_); } } Exporter::export($pack, $callpkg, @symbols); level(@levels); } sub level { for (@_) { if ($_ eq '+') { # all on # switch on all levels %current_level = map { $_ => 1 } @levels; } elsif ($_ eq '-') { # all off %current_level = (); } elsif (/^([-+])(\w+)$/) { $current_level{$2} = $1 eq '+'; } else { Carp::croak("Illegal level format $_"); } } } sub trace { _log(@_) if $current_level{'trace'}; } sub debug { _log(@_) if $current_level{'debug'}; } sub conns { _log(@_) if $current_level{'conns'}; } sub _log { my $msg = shift; $msg .= "\n" unless $msg =~ /\n$/; # ensure trailing "\n" my($package,$filename,$line,$sub) = caller(2); print STDERR "$sub: $msg"; } 1; __END__ =head1 NAME LWP::Debug - deprecated =head1 DESCRIPTION LWP::Debug used to provide tracing facilities, but these are not used by LWP any more. The code in this module is kept around (undocumented) so that 3rd party code that happen to use the old interfaces continue to run. One useful feature that LWP::Debug provided (in an imprecise and troublesome way) was network traffic monitoring. The following section provide some hints about recommened replacements. =head2 Network traffic monitoring The best way to monitor the network traffic that LWP generates is to use an external TCP monitoring program. The Wireshark program (L) is higly recommended for this. Another approach it to use a debugging HTTP proxy server and make LWP direct all its traffic via this one. Call C<< $ua->proxy >> to set it up and then just use LWP as before. For less precise monitoring needs just setting up a few simple handlers might do. The following example sets up handlers to dump the request and response objects that pass through LWP: use LWP::UserAgent; $ua = LWP::UserAgent->new; $ua->default_header('Accept-Encoding' => scalar HTTP::Message::decodable()); $ua->add_handler("request_send", sub { shift->dump; return }); $ua->add_handler("response_done", sub { shift->dump; return }); $ua->get("http://www.example.com"); =head1 SEE ALSO L DebugFile.pm000064400000000053147633770250006744 0ustar00package LWP::DebugFile; # legacy stub 1; MemberMixin.pm000064400000001516147633770250007337 0ustar00package LWP::MemberMixin; sub _elem { my $self = shift; my $elem = shift; my $old = $self->{$elem}; $self->{$elem} = shift if @_; return $old; } 1; __END__ =head1 NAME LWP::MemberMixin - Member access mixin class =head1 SYNOPSIS package Foo; require LWP::MemberMixin; @ISA=qw(LWP::MemberMixin); =head1 DESCRIPTION A mixin class to get methods that provide easy access to member variables in the %$self. Ideally there should be better Perl language support for this. There is only one method provided: =over 4 =item _elem($elem [, $val]) Internal method to get/set the value of member variable C<$elem>. If C<$val> is present it is used as the new value for the member variable. If it is not present the current value is not touched. In both cases the previous value of the member variable is returned. =back