#!/usr/local/bin/perl # # Bucket "lid", version 2.0 # # 7/18/98 M.L.Nelson@larc.nasa.gov # # based on various prototypes by: # # Michael Nelson # Del Croom # # and some good discussions with: # # Dan Page # David Bianco # #---------------------------------------------------------------------------- # Global variables that change value are in UPPERCASE # They are: # $BROWSER -- flag to indicate access by a browser if set to 1, which # causes all output to be HTTP compliant (headers) # $BIBFILE -- the canonical metadata file. too many places need it. # @BIBFILE -- contents of [handle].bib # %BIB_FIELDS -- metadata separate by fields # @TC -- the terms and conditions associated with the current method # # Other globals are set in &init and should only change for implementation # reasons. # ######################################################################## # MAIN { &init; $input = &ReadParse; # if no input, default method is display if ( !$input ) { $in{"method"} = "display"; } &parse_metadata; &call_method; } ######################################################################## # Subroutine: init # Purpose: lots of initialization stuff... # Called by main # Created: 07/27/98 MLN ######################################################################## sub init { # global file name variables $metadata_dir = "_md.pkg"; $log_dir = "_log.pkg"; $log_file = "$log_dir/access.log"; $bucket_source_file = "index.cgi"; $mime_file = "_http.pkg/mime.e"; $encoding_file = "_http.pkg/encoding.e"; $cgilib_file = "_http.pkg/cgi-lib.pl"; $method_dir = "_methods.pkg"; $tc_dir = "_tc.pkg"; $principal_file = "$tc_dir/passwd"; $state_dir = "_state.pkg"; # for when we are running 1 directory down push(@INC,".."); # stuff for file upload... # take from S. E. Brenner's cgi-lib.pl # http://cgi-lib.stanford.edu/ # an uploaded element file will be stored in $in{'upfile'} require "$cgilib_file"; $cgi_lib'maxdata = 5000000; # should be bigger? -- mln # tokens $author_delimiter = ":::"; $myurlhere = "MYURLHERE"; $current_dir = "CURRENT_DIR"; # mime & encoding require "$mime_file"; require "$encoding_file"; # thumbnail / scan stuff $thumbnail_increment = 10; # set STDERR to be STDOUT; so any errors will print to STDOUT... open(STDERR,">&STDOUT"); select(STDOUT); $| =1; # terms and conditions stuff $restricted = "restricted/"; $dummy_user = "_Phil_is_Cool_"; $template_htaccess = "$tc_dir/htaccess.template"; $real_htaccess = "$tc_dir/.htaccess"; $noaccess_htaccess = "$tc_dir/htaccess.noaccess"; # if we are the restricted version, we need to go up 1 directory if (&running_restricted) { chdir (".."); } # set the real current dir here # note -- I'm not sure how portable this will be... # I wish I did not have to require pwd.pl... - mln require "pwd.pl"; &initpwd; $real_current_dir = $ENV{'PWD'}; } ######################################################################## # Subroutine: fix_htaccess # Purpose: the .htaccess file for http user authentication requires # an absolute pathname for the passwd file. of course, # absolute pathnames are the bane of bucketness. # so we copy a template .htaccess to the real .htaccess # replacing a token with the current dir # Called: init, tc # Created: 08/19/1998 MLN ######################################################################## sub fix_htaccess { open (H,"$template_htaccess"); while () { $line = $_; if ($line =~ /$current_dir/) { $line =~ s/$current_dir/$real_current_dir/g; } push(@htaccess_lines,$line); } close (H); open (H,">$real_htaccess"); chmod (0666, $real_htaccess); foreach $line (@htaccess_lines) { print H "$line"; } close (H); } ######################################################################## # Subroutine: running_restricted # Purpose: returns 1 if we are running as restricted, 0 if not # Called: init, tc # Created: 08/19/1998 MLN ######################################################################## sub running_restricted { local($base) = &MyBaseUrl; if ($base =~ /$restricted/) { return(1); } else { return(0); } } ######################################################################## # Subroutine: parse_metadata # Purpose: Parse the bibliography (BIBFILE) according to the native # protocol, derive NCSTRL+ (RFC 1807) tags, and write # translation to @BIBFILE # the .bib file is canonical; other formats are derived # from it. # Called by main # Created: 08/06/1997 Del Croom (d.r.croom@larc.nasa.gov) # Updated: 10/04/1998 MLN ######################################################################## sub parse_metadata { opendir(DIR,"$metadata_dir"); # open metadata directory @files = grep (/\.bib$/, (readdir(DIR))); # read all .bib files if (@files) { $BIBFILE = $files[0]; # grab the 1st bib file } else { &complain("cannot find a RFC 1807 bib file!"); } closedir(DIR); # close directory filehandle $BIBFILE = "$metadata_dir" . "/" . "$BIBFILE"; # RFC 1807 is the canonical format for buckets &read_metadata_rfc1807($BIBFILE); } ######################################################################## # Subroutine: read_metadata_rfc1807 # Purpose: Read BIBFILE and stuff into @BIBFILE (NCSTRL+ default) # updated: also parses and populates @BIB_FIELDS # Called by parse_metadata # Created: 08/07/1997 Del Croom (d.r.croom@larc.nasa.gov) # Updated: 08/03/1998 MLN ######################################################################## sub read_metadata_rfc1807 { local($bibfile) = @_; local($more_abstract); # copy contents of the [handle].bib file to array @BIBFILE open(BIB,$bibfile); # open the bibfile for reading while() { # cycle through line by line... push(@BIBFILE,$_); # push current line into array } close(BIB); } ######################################################################## # Subroutine: read_tc_file # Purpose: reads in the tc file, populates @TC # Called by: tc # Created: 08/17/98 MLN ######################################################################## sub read_tc_file { local($tc_file) = @_; open(T,"$tc_file"); while () { $line = $_; chop ($line); push(@TC,$line); if ($line =~ /^user:/) { $line =~ s/^user://; push (@TC_USERS,$line); } elsif ($line =~ /^host:/) { $line =~ s/^host://; push (@TC_HOSTS,$line); } elsif ($line =~ /^addr:/) { $line =~ s/^addr://; push (@TC_ADDRS,$line); } elsif ($line =~ /^package:/) { $line =~ s/^package://; push (@TC_PACKAGES,$line); } elsif ($line =~ /^element:/) { $line =~ s/^element://; push (@TC_ELEMENTS,$line); } # ignore lines that don't begin with: # user:, host:, addr:, package:, element: } close (T); } ######################################################################## # Subroutine: enforce_tc # Purpose: does the actual checks on the @TC arrays # Called by: tc # Created: 08/18/98 MLN ######################################################################## sub enforce_tc { local($user,$host,$addr,$package,$element) = @_; # you can protect: # 1. a method # 2. an entire package # 3. a package/element combo # # note: you *cannot* combine cases 2 and 3. either restrict entire # packages, or restrict package/elements. combinations will # not work... this is probably a bug. -- mln # # you can restrict on: # 1. user # 2. hostname # 3. ip address # # note: the hosts specified in the .tc file must be FQDN # also, we currently don't have a canonicalization routine # to handle aliases for hosts... # case 1: an entire method is protected # there will be no values in @TC_PACKAGES or @TC_ELEMENTS if (!(@TC_PACKAGES) && !(@TC_ELEMENTS) ) { if ( grep(/$user/,@TC_USERS) && (grep {$user =~ /$_/} @TC_USERS ) && (grep {$addr =~ /$_/} @TC_ADDRS ) ) { return (1); } else { return (0); } } # end case 1 # case 2: an entire package is protected. if someone is invoking # this method w/ a different package, let them pass # they will be requesting a package, but no @TC_ELEMENTS # will be specified if ( ($package) && !(@TC_ELEMENTS) ) { if (grep(/$package/,@TC_PACKAGES)) { # if we're here, then $package is in @TC_PACKAGES # meaning its protected if ( grep(/$user/,@TC_USERS) && (grep {$host =~ /$_/} @TC_HOSTS ) && (grep {$addr =~ /$_/} @TC_ADDRS ) ) { return (1); } else { return (0); } } else { # if we're here, then $package is not protected # so let it pass through return (1); } } # end case 2 # case 3: protect an individual element and package combination # if either don't match the protected list, let it pass if ( ($package) && ($element) ) { if ( (grep(/$package/,@TC_PACKAGES)) && (grep(/$element/,@TC_ELEMENTS)) ) { # the package *and* element are on the protected lists if ( grep(/$user/,@TC_USERS) && (grep {$host =~ /$_/} @TC_HOSTS ) && (grep {$addr =~ /$_/} @TC_ADDRS ) ) { return (1); } else { return (0); } } else { # one or both of $package & $element are not protected # just pass through return (1); } } # end case 3 # let's try a default value of pass... have we missed any cases? return(1); } ######################################################################## # Subroutine: tc # Purpose: checks terms and conditions for each of the methods # Called by call_method # Created: 07/18/98 MLN ######################################################################## sub tc { local($method) = @_; local($package) = $in{"pkg_name"}; local($element) = $in{"element_name"}; local($user) = $ENV{'REMOTE_USER'}; local($host) = $ENV{'REMOTE_HOST'}; local($addr) = $ENV{'REMOTE_ADDR'}; $tc_file = "$tc_dir/$method.tc"; if (-f $tc_file) { # see what is in the TC file &read_tc_file($tc_file); # if host or addr empty, add current values to # @TC_HOSTS, @TC_ADDRS so these arrays will always have # values (makes some of the logic easier) if (!(@TC_HOSTS)) { push(@TC_HOSTS,$host); } if (!(@TC_ADDRS)) { push(@TC_ADDRS,$addr); } # if we are a restricted version, check TC values if (&running_restricted) { if (&enforce_tc($user,$host,$addr,$package,$element) ) { # everything is ok, so pass through } else { &http_header("text/plain"); &complain("($user, $host, $addr) not allowed for method = $method package = $package element = $element"); } } else # not running as restricted { if (@TC_USERS) { # do the user restrictions match our # package and/or element? # note: this only does 1 p/e per method... # must think of fixing that if ( ( !(@TC_ELEMENTS) && grep(/$package/,@TC_PACKAGES) && ($package) ) || ( grep(/$package/,@TC_PACKAGES) && grep(/$element/,@TC_ELEMENTS) && ($package) && ($element) ) || ( !(@TC_ELEMENTS) && !(@TC_PACKAGES) ) ) { # redirect as restricted # we must get the user info via http $restricted_url = &MyBaseUrl; $restricted_url =~ s/index.cgi//g; $restricted_url .= $restricted; if ($ENV{"QUERY_STRING"}) { $restricted_url = "$restricted_url?$ENV{'QUERY_STRING'}"; } &fix_htaccess; print "Location: $restricted_url\n\n"; } else # user info not needed here { # just pass through } } else # no users defined { # enforce the restrictions we do have # must be hosts or addrs # push $dummy_user onto @TC_USERS and # pass $dummy_user to &enforce_tc since # any value is ok if (!(@TC_USERS)) { push(@TC_USERS,$dummy_user); } if (&enforce_tc($dummy_user,$host,$addr, $package,$element)) { # everything is ok, so pass through } else { &http_header("text/plain"); &complain("($user, $host, $addr) not allowed for method = $method package = $package element = $element"); } } } } else # there is no .tc file for this method { # do nothing; no TC attached to this method. } } ######################################################################## # Subroutine: call_method # Purpose: To invoke called method sent to script. # Method MUST be the first argument passed. # Called by main # Created: 08/06/1997 Del Croom (d.r.croom@larc.nasa.gov) # Updated: 07/27/1998 MLN ######################################################################## sub call_method { local ($method) = $in{"method"}; $method_file = "$method_dir/$method.pl"; if (-f $method_file) { # check tc; load the file; call the method &tc($method); # if we made it out of &tc, we must be ok... require "$method_file"; &$method; } else { &unsupported($method); } } ######################################################################## # Subroutine: footer # Use: Appends common info to end of webpages # Called by all methods requiring html output # Created: 07/23/1997 Del Croom (d.r.croom@larc.nasa.gov) ######################################################################## sub footer { print "
\nNCSTRL+
\n"; print "This server operates at NASA Langley Research Center and "; print "Old Dominion University.
\n"; print "Send email to "; print "m.l.nelson\@larc.nasa.gov\n"; print "\n\n"; } ######################################################################## # Subroutine: log # Purpose: Opens the bucket logfile (bucket.log) and appends an entry # as described by the calling routine # Called by most methods # Created 08/08/97 Del Croom (d.r.croom@larc.nasa.gov) # Updated: 08/20/98 MLN ######################################################################## sub log { local($action, $status, $msg) = @_; if ($ENV{'REMOTE_HOST'}) { # requester's IP hostname $host=$ENV{'REMOTE_HOST'}; } elsif ($ENV{'REMOTE_ADDR'}) { $host=$ENV{'REMOTE_ADDR'}; } else { $host="(command line)"; } $agent=$ENV{'HTTP_USER_AGENT'}; # requester's browser/server type @now=localtime(time); # get current timestamp ($min,$hr,$day,$mon,$yr)=@now[1..5]; # extract fields from @now @months=('Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'); if ($yr <= 99) {$yr += 1900;} # fix the "Year 2000" problem else{$yr += 2000; # but not the "Year 2100+" problem ;-> } # open logfile and append entry which looks like: # # dosxx.larc.nasa.gov [Aug-08-1997] 21:43 delete_package OK "P1 removed" Mozilla/4.01 # open(LOG,">>$log_file"); chmod (0666, $log_file); # world writable printf LOG "%-30s [$months[$mon]-%02d-%04d] %02d:%02d",$host,$day,$yr,$hr,$min; print LOG " $action $status \"$msg\" $agent\n"; close(LOG); } ######################################################################## # Subroutine: http_header # Use: sets the MIME type # Updated: 07/27/1998 MLN ######################################################################## sub http_header { local ($type) = @_; print "Content-type: $type\n\n"; } ######################################################################## # Subroutine: file_size # Use: return the filesize # Called by: display_default # Created: 08/04/98 MLN ######################################################################## sub file_size { local($file) = @_; local($bytes); # if readable file, and a plain file... if ( (-r $file) && (-f $file) ) { $bytes = (-s $file); # we could pretty-process $bytes, but currently do not return "($bytes bytes)"; } else { return (""); } } ######################################################################## # Subroutine: shorten_bibfile # Use: deletes package or element info from the bib file # Created: 07/27/98 Michael Nelson ######################################################################## sub shorten_bibfile { local($bibfile,$type,$name) = @_; open (BIB, ">$bibfile"); chmod (0666,$bibfile); # world writable for testing foreach $line (@BIBFILE) { if (($line =~ /^$type-END::/) && ($line =~ /$name/) ) { $skip = 0; next; } if (($line =~ /^$type/) && ($line =~ /$name/) ) { $skip = 1; next; } if ($skip) { next; } else { print BIB "$line"; } } close (BIB); } ######################################################################## # Subroutine: append_bibfile # Use: Adds new info to the bibfile # Created: 07/26/98 Michael Nelson ######################################################################## sub append_bibfile { local ($bibfile, $newinfo,$endtag) = @_; open (BIB, ">$bibfile"); chmod (0666,$bibfile); # world writable for testing $newinfo =~ s/\r//g; # from Ajoy, 9/18/98 foreach $line (@BIBFILE) { $line =~ s/\r//; # from Ajoy, 9/18/98 if ($line =~ /$endtag/) { # this should handle both new packages, and # elements being placed within a package print BIB "$newinfo\n"; print BIB "$line"; } else { print BIB "$line"; } } } ######################################################################## # Subroutine: unsupported # Use: Notifies requester that an unsupported method was called # Created: placeholder for interpreter ######################################################################## sub unsupported { local($method) = @_; &http_header("text/html"); print "\n\nNCSTRL+ Placeholder\n\n"; print "\n

NCSTRL+ Placeholder

\n"; print "

An UNSUPPORTED Method $method was called

\n
\n"; &footer; # print HTML footer &log("unsupported","ERR","$in{'method'} not implemented"); } ######################################################################## # Subroutine: name_collision # Use: returns unique names # Created: 9/13/98 MLN ######################################################################## sub name_collision { local ($pkg_name, $element_name) = @_; local ($counter=0); while (-e "$pkg_name/$element_name") { $element_name =~ s/\.pkg$//g; $element_name =~ s/\.$counter$//g; $counter += 1; $element_name .= ".$counter.pkg"; } return ($element_name); } ######################################################################## # Subroutine: complain # Use: a nasty gram to the user # Created: 7/28/98 MLN ######################################################################## sub complain { local($msg) = @_; print ("$msg\n"); &log("complain","ERR","quit: $msg"); exit (0); } # END