#!/usr/local/bin/perl5 # # File: sproxy.pl # Author: Adam Janin (janin@cs.berkeley.edu) # Date: 1/18/97 # Ver: 0.0 (alpha, pre-alpha, whatever) # # Proxy server that adds link labels and JavaScript hotkeys. # # Copyright (C) 1998 Regents of the University of California. # All rights reserved. # # Disclaimer: This software is provided "as is". I take no # responsibility for any problems resulting from its use. # # You may use this software in any way as long as: # o You do not charge for it. # o The original author and copyright information is retained. # # The proxy part of this code is based primarily on phd, a perl # proxy server written by Jerry LeVan (levan@eagle.eku.edu). # # See http://www.icsi.berkeley.edu/~janin/sproxy/sproxy.html for # additional information (including installation, known bugs, etc). # require 5.004; require HTML::TreeBuilder; require HTML::Element; use URI::URL; use Socket; use Getopt::Std; # # User Configuration # # Change if port 8000 is busy or you want to use a "standard" port (e.g. 80). $port = 8000; # Change if you don't like the name or want to provide an absolute path. $logfile = "proxy.log"; # You should probably change these to something reasonable. A * in a # field means "any". To connect, you must match @accesslist and not # match @denylist. @accesslist = ("128.32.201.*"); # Anyone on our subnet @denylist = (); # If $verbose is 0, you only get startup and shut down messages. # If $verbose is 1, you get access log messages. # If $verbose is 2, you get a bit more. # If $verbose is 3, you get a ton. Don't do this unless you're sure! $verbose = 0; # # You shouldn't have to change anything below this line # $version = "0.0"; $program = "sproxy"; open(LOG,">>$logfile") || die "Can't open log file" ; select(LOG) ; $| = 1; select(STDOUT); # # Elements to add labels # %labeledElements = ( 'a' => 'href' ); # # Set up the JavaScript that gets added. # Note that "var jLinks = [ "http://...", "..." ];" gets prepended # to this on a per-page basis. # # If you're on a UNIX machine, you may need to manually add CR/LF pairs. # # Sorry for the lack of comments in the JavaScript. The original parser # couldn't handle comments, so I removed them all. # # Ascii Char # 103 g # 115 s # 13 return # 48 0 (zero) # 57 9 # # jState = 0, start state # = 1, have seen a 's' or 'g', parsing # $ScriptText = <<'EOS'; var jKeyBuf = []; var jState = 0; var jOp = 0; var jOldStatus = window.defaultStatus; function jhandleKey(e) { if (e.target.type != "undefined") { return true; } if (jState == 0) { if (e.which == 103 || e.which == 115) { if (e.type == "keydown") { jState = 1; jOp = e.which; jKeyBuf.length = 0; jOldStatus = window.defaultStatus; window.defaultStatus = String.fromCharCode(e.which); } return false; } else { if (e.type == "keydown") { window.defaultStatus = jOldStatus; } return true; } } else if (jState == 1) { if (e.which == 13) { if (jKeyBuf.length == 0) { if (e.type == "keydown") { jState = 0; window.defaultStatus = jOldStatus; } return true; } if (e.type == "keydown") { window.defaultStatus = jOldStatus; if (jOp == 103) { jLinkTo(jParseBuf()); } else if (jOp == 115) { jShowLink(jParseBuf()); } jState = 0; } return false; } else if (48 <= e.which && e.which <= 57) { if (e.type == "keydown") { jKeyBuf[jKeyBuf.length] = e.which; window.defaultStatus += String.fromCharCode(e.which); } return false; } else { if (e.type == "keydown") { jState = 0; window.defaultStatus = jOldStatus; } return true; } } else { alert("Illegal State!"); return true; } } function jParseBuf() { var total = 0; var i; for (i = 0; i < jKeyBuf.length; i++) { total = total * 10 + jKeyBuf[i] - 48; } return total; } function jLinkTo(index) { if (0 <= index && index < jLinks.length) { window.location.href=jLinks[index]; } } function jShowLink(index) { if (0 <= index && index < jLinks.length) { window.status = jLinks[index]; } } document.captureEvents(Event.KEYUP); document.captureEvents(Event.KEYDOWN); document.captureEvents(Event.KEYPRESS); document.onKeyUp = jhandleKey; document.onKeyDown = jhandleKey; document.onKeyPress = jhandleKey; EOS # # Initialize port, standard perl server boilerplate # $sockaddr = 'S n a4 x8'; ($name, $aliases, $proto) = getprotobyname('tcp'); $thisport = pack($sockaddr, &AF_INET, $port, "\0\0\0\0"); # wildcard addr socket(S, &PF_INET, &SOCK_STREAM, $proto) || die "can't create socket: $!\n"; setsockopt(S, &SOL_SOCKET, &SO_REUSEADDR, pack('i', 1)) || die "can't setsockopt: $!\n"; bind(S,$thisport) || die "can't bind socket: $!\n"; listen(S,5) || die "can't listen to socket: $!\n"; # Don't show the environment of the invoker of this server. foreach $key (keys %ENV) { delete $ENV{$key}; } print LOG ×tamp, " Starting sproxy on port $port, PID = $$\n"; # Answer if someone knocks on the port. for (;;) { if(!accept(NS,S)) { print LOG "Accept Failure, shutting down sproxy, error: $!\n"; exit 1; } # Set the environment. SetupCommand(NS); # Check to see if they can access the server. if(!CheckAccess($ENV{REMOTE_ADDR},@accesslist)) { &ErrorMessage(NS,400,"Access Denied"); exit 0; # Bail out } # Check to see if the caller is explicitly denied access to the server. if(CheckAccess($ENV{REMOTE_ADDR},@denylist)) { &ErrorMessage(NS,400,"Access Denied"); exit 0; # Bail out } # Try to do the proxy thing DoCommand(NS); } # Sets many enviromental variables # # I don't understand why we need this -- AJ # sub SetupCommand { my $sock= $_[0]; $ENV{SERVER_SOFTWARE} = "$program/$version"; my $rem_ip_addr = (unpack($sockaddr, getsockname($sock)))[2]; $ENV{SERVER_NAME} = (gethostbyaddr($rem_ip_addr, &AF_INET))[0]; $ENV{GATEWAY_INTERFACE} = "CGI/1.1"; $ENV{SERVER_PROTOCOL} = "http/1.0"; $ENV{SERVER_PORT} = "$port"; $rem_ip_addr = (unpack($sockaddr, getpeername($sock)))[2]; $ENV{REMOTE_HOST} = (gethostbyaddr($rem_ip_addr, &AF_INET))[0]; $ENV{REMOTE_ADDR} = join(".", unpack("C4", $rem_ip_addr)); } # Print an error message to the client sub ErrorMessage { my($fd,$error,$message) =@_; print $fd "HTTP/1.0 200 OK\n"; print $fd "Content-type: text/html\n\n"; print $fd "Error Message"; print $fd "

Error $error

"; print $fd "
\n"; print $fd $message; print $fd "
\n"; close($fd); } # Match client IP against access lists, returns true if match else false sub CheckAccess { my ($who,@list) = @_; my $pattern; foreach $pattern (@list) { $pattern =~ s/\./\\./g ; # replace . by \. $pattern =~ s/\*/\\d\+/g; # replace * by \d+ return 1 if $who =~ /$pattern/; } return 0; # search failed } # # Generate a time stamp for the log files... # sub timestamp { my @months =("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep", "Oct","Nov","Dec"); my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time); sprintf("[%02u/%s/%02u:%02u:%02u:%02u]",$mday,$months[$mon],$year, $hour,$min,$sec); } sub DoCommand { my $socket = $_[0]; # socket to client my $server ; # socket to remote server my $commandline; # first line from client my @result; # In proxy mode, clients will send us # [GET|POST] proto://machine{:port}{/{file}} HTTP/n.m #get the first line from the client socket chomp($commandline = <$socket>); $commandline =~s/\r//; print LOG timestamp," $$ CMD: $commandline From:$ENV{REMOTE_ADDR}\n" if $verbose; my ($command,$target,$proto) = split(" ",$commandline,3); ErrorMessage($socket,500,"Only GET,POST can be used, $command not legal."),exit 1 unless $command =~ /(GET|POST)/; my ($protocol,$machine,$port,$file) = @result = parse($target); if((scalar @result) == 4) { # successful parse ErrorMessage($socket,500,"Only http allowed, $protocol is not legal."),exit 1 unless $protocol =~ /http/i; if(!defined $port && ($protocol eq "http")) { $port = 80 ; } # Connect to the requested server $server = GetSocket($machine,$port); if (!$server) { ErrorMessage($socket,500,"Can't Connect to $machine:$port"); } else { SendRequest($server,$socket,$command,$file,$proto) if( $protocol eq "http"); SendResponse($server,$socket, $target); } } else {ErrorMessage($socket,500,"Can't parse $target") ;exit(1); } } sub SendRequest { my ($server,$socket,$command,$file,$proto) = @_; my ($line,$bytes); my @prolog=(); #step 1, Send the modified original command to the server. push @prolog, "$command /$file $proto\r\n" ; #step 2, Read and transmit lines from the client to the server # noting a Content-length: header in case this is a POST operation. # we read until a blank line is found. while ( $line = <$socket>) { push @prolog , $line; last if $line =~ /^(\r\n|\n)$/; # get out of loop on empty line # Get size of following query string if Content-length specified chomp($bytes = (split(':',$line))[1] ) if $line =~ /Content-Length:/i; } # step 3 If this is a POST request then we must send the query data. if ( $command =~ /POST/) { read($socket,$line, $bytes); push @prolog, $line, "\r\n"; # make sure the lines are terminated } #send every thing to the server print $server @prolog; # copy to log if verbose set high enough print LOG timestamp, @prolog if $verbose > 1; # Done! return to caller and get response } # # This is where the labels get added and the JavaScript code is inserted. # sub SendResponse { my($server,$socket, $target) = @_; my $buffer; my $contlen; my $isHtml = 0; my @header; # Read and store header info until the first blank line. # Check if the content type is text/html. # Need to store the header to adjust the Content-Length field. while ($_ = <$server>) { last if /^[ \t\r]*$/; if (/Content-Type:.*text\/html/i) { $isHtml = 1; } if (/Content-Length:[ \t]*([0-9]+)/i) { $contlen = $1; } else { push(@header, $_); } } # If it's not text/html, just copy verbatim if (!$isHtml) { if ($verbose >= 3) { print LOG join('', @header); print LOG "Content-Length: $contlen\n\n" if $contlen; } print $socket join('', @header); print $socket "Content-Length: $contlen\n\n" if $contlen; # Copy 1024 at a time. while (read($server, $buffer, 1024)) { print $socket $buffer; } return; } # If it is text/html, parse it. my $h = new HTML::TreeBuilder; $h->ignore_unknown(0); $h->implicit_tags(0); $h->warn(1); $h->parse_file(\*$server); # Now go through and add the index labels and collect up the # destinations Also, find the "html" node while we're at it. my $count = 1; my $htmlnode; my $baseurl = url($target); # Base starts as current page. # "Secret" 0 link. my @linkList = ( "http://www.icsi.berkeley.edu/~janin/sproxy" ); $h->traverse(sub { my($self, $start, $depth) = @_; return 1 unless $start; my $tag = $self->tag(); if ($tag =~ /^html$/i) { $htmlnode = $self; } elsif ($tag =~ /^base$/i) { # If is specified, use it. # I have no idea if this scopes correctly. $baseurl = url($self->attr('href')); } my $attr = $labeledElements{$tag}; return 1 unless defined $attr; $attr = [$attr] unless ref $attr; for (@$attr) { my $val = $self->attr($_); if (defined $val) { # Given the current page or a tag, handle # transforming the URL in $val and add it to linkList. $val = url($val)->abs($baseurl)->as_string(); push(@linkList, $val); my $labnode = new HTML::TreeBuilder; $labnode->implicit_tags(0); $labnode->ignore_unknown(0); $labnode->warn(1); # Here it is... $labnode->parse(sprintf("[%d]", $count++)); # The next line is pretty weird. It's needed since # parse generates a full html document, not just a # fragment. We want just the interior of it. $self->push_content(($labnode->content())->[0]); last; } } 1; }, 'ignoretext'); # Now add the JavaScript stuff. my $sstr = "