#!/bin/sh # # Rover - retrieve and forward an item specified by a URL # # Use: # # If called without arguments, Rover returns a form-based HTML page. The # form asks the user to specify a desired URL, then calls Rover once again, # passing the desired URL as an argument. # # If called with the argument "URL=...", Rover retrieves the specified item, # passing it back to the client. If the retrieved item is in HTML format, # all URLs referenced in it are modified to invoke Rover. # # Rover should be installed in the normal directory for CGI scripts. # # Be SURE to use a name starting with "nph-" (e.g., nph-rover), # to force use as a non-parsed header script). # # Do NOT install Rover without considering the legal ramifications # of letting your system forward arbitrary proscribed material! # # Written 9602 by James Marshall, jsm@crl.com # Modified 9602 by Rich Morin, rdm@cfcl.com # # This code is distributed under the provisions of the GNU General Public # License. Contact gnu@prep.ai.mit.edu for more information. # # Version 1.0.4, 960326 /usr/local/bin/perl -e ' $* = 1; # allow multi-line matching &intro() if ($ENV{"QUERY_STRING"} eq ""); # Read the URL from the query input ($ENV{"QUERY_STRING"} =~ /^URL=([^&]*)/) || &HTMLdie("Bad Input:"); $URL = $1; # Remove Hex character encodings and such from the URL. $URL =~ s/\+/ /g; $URL =~ s/%([\da-fA-F]{2})/pack("c", hex($1))/ge; # parse the URL, simply ($host, $port, $path) = ($URL =~ m#//([^/:]*):?([^/]*)(/.*)?$#i); $port || ($port = 80); $path || ($URL .= $path = "/"); # Build the HTTP request string # Include GET line, From:, Accept:, User-Agent:, Referer: $user = $ENV{"REMOTE_IDENT"} || $ENV{"REMOTE_USER"} || "???"; $request = join("", "GET $path HTTP/1.0\r\n", "From: $user@" . $ENV{"REMOTE_HOST"} . "\r\n", "Accept: " . $ENV{"HTTP_ACCEPT"} . "\r\n", "User-Agent: " . $ENV{"HTTP_USER_AGENT"} . "\r\n", "Referer: " . $ENV{"HTTP_REFERER"} . "\r\n\r\n") ; # Connect socket to host; send request; wait with select(). &newsocketto(*S, $host, $port); print S $request; vec($rin = "", fileno(S), 1) = 1; select($rin, undef, undef, 60) || &HTMLdie("No response from $host:$port") ; # Read Status line; headers into $headers; and entire object body into $_ # Support both HTTP 1.x and HTTP 0.9 $status= $_ = ; # first line is the status line in HTTP 1.x if (m#^HTTP/#) { # HTTP 1.x print; do { $headers .= $_ = ; } until (/^(\r\n|\n)$/); # lines may be terminated with LF or CRLF undef $/; $_ = ; } else { # HTTP 0.9 undef $/; $_ .= ; # inefficient? } close(S); # setting these three vars is part of &fullurl(); placed here for speed. $urlstart = "http://" . $ENV{"SERVER_NAME"} . ":" . $ENV{"SERVER_PORT"} . $ENV{"SCRIPT_NAME"} . "?URL="; $basehost = "http://$host:$port"; $basepath = substr($URL, 0, rindex($URL, "/")+1); # If we get a 300-level response code, update the Location: header to # point back through the script, so the browser will retrieve it correctly. if ($status =~ m#^HTTP/[0-9.]*\s*3\d\d#) { $headers =~ s/^Location:\s*(.*)/"Location: " . &fullurl($1)/gie; $headers =~ s/^URI:\s*(.*)/"URI: " . &fullurl($1)/gie; } # Update all URLs in all tags that refer to URLs # Only update the URLs if it is an HTML file (or using HTTP 0.9)! if (($headers =~ m#^Content-type:\s*text/html#i) || !$headers) { # Remove Content-Length header, since we are changing the content # Would be better to insert correct content length. $headers =~ s/^Content-Length:.*\n//i; # $_ .= "\0" x (length/2) ; # pre-extend string for efficiency # This is a complete list of HTML tags that include a URL in an attribute $tag{"a" } = "href"; $tag{"img" } = "src"; $tag{"body" } = "background"; # Netscape only # $tag{"form" } = "action"; # forms not supported through Rover $tag{"input" } = "src"; # type must be image $tag{"link" } = "href"; $tag{"meta" } = "url"; # Netscape only # These are HTML 3.0 tags $tag{"area" } = "href"; $tag{"fig" } = "src"; $tag{"note" } = "src"; $tag{"overlay" } = "src"; $tag{"select" } = "src"; while (($t1, $t2) = each %tag) { s/(<$t1\b[^>]*\b$t2\s*=\s*"?)([^\s">]*)/$1.&fullurl($2)/gie; } $_ = "\n\n" . $_; } # print the headers, a comment, and the entire (possibly modified) file print $headers, $_; exit; #--------------------------------------------------------------------------- # Returns the full URL to query our script for an absolute URL. Includes an # abbreviation of relative->absolute URL conversion. It handles almost all # URLs that actually exist, though it does not contract "." and ".." paths. # # URLs it does not handle include "//www.host.com", "http:localpath/file", # "?newquery", and other incomplete (but valid) relative URLs. # # The full procedure is described in RFC 1808, section 4. # Note that the calculation of $urlstart, $basehost, and $basepath above # are an integral part of this routine, placed above for speed. # # If the chars in these URLs were not so predictable, we would url-encode them. sub fullurl { local($relurl) = @_ ; $relurl =~ m#^http://#i && return "$urlstart$relurl"; $relurl =~ m#^[\w+.-]*:#i && return $relurl; $relurl =~ m#^/# && return "$urlstart$basehost$relurl"; return "$urlstart$basepath$relurl"; } #--------------------------------------------------------------------------- # Returns an introductory page, containing an HTML form. sub intro { local($Rover) = "http://" . $ENV{"SERVER_NAME"} . ":" . $ENV{"SERVER_PORT"} . $ENV{"SCRIPT_NAME"}; print < Rover

Welcome to Rover!

Rover is an "HTTP Proxy in a Script". It defeats address-based Web censorship, allowing you to retrieve pages that your local authorities (e.g., company or government) do not wish you to see.

To use Rover, specify a desired URL (below). Rover will retrieve the item, returning it to your Web browser. If the item is an HTML page, Rover will edit it so that any links contained in it are referred back to Rover.

Caution:

Installation and/or use of Rover may get you into arbitrary amounts of trouble with your local authorities. Do not assume that your use of Rover is private. More generally, do not install or use Rover unless you are prepared to deal with the consequences!

Rover is intended for use by responsible adults; parents may therefore wish to add this URL to their web-filtering software. Please note, however, that any large-scale adoption of Rover will defeat this strategy. Consequently (as in most situations), parental supervision and authority will be required.

Enter the desired URL:

For more information:

Rover is described in "The Limits of Control" (The Internet Notebook, Rich Morin, UNIX Review, June 1996). An online copy of the article is available as http://www.ptf.com/tin/P/9606.html.

EOTXT exit; } #--------------------------------------------------------------------------- # The following subroutine looks messy, but can be used to open any # TCP/IP socket in any Perl program. Except for the &HTMLdie() part. sub newsocketto { local(*S, $host, $port) = @_; $AF_INET = 2 ; $SOCK_STREAM = 1; # Usually in /usr/include/sys/socket.h # Create the remote host data structure, from host name or IP address $hostaddr = ($host =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) ? pack("c4", $1, $2, $3, $4) # for IP address : ((gethostbyname($host))[4] # for alpha host name || &HTMLdie("Could not find address for $host")); $remotehost = pack("S n a4 x8", $AF_INET, $port, $hostaddr); # Create the socket and connect to the remote host socket(S, $AF_INET, $SOCK_STREAM, (getprotobyname("tcp"))[2]) || &HTMLdie("Could not create socket"); connect(S, $remotehost) || &HTMLdie("Could not connect to $host:$port"); select((select(S), $|=1)[0]); # unbuffer the socket } #--------------------------------------------------------------------------- # die, emitting an HTML nastygram sub HTMLdie { local($msg) = @_; print < Proxy-in-a-Script Error

$msg

Error message: $! EOF exit; } '