#!/usr/local/bin/./perl

# 4/13/96 Michael Smith 
# for WebGlimpse
# see http://glimpse.cs.arizona.edu/webglimpse for more information

# returns full search box (with preference to the referring page)
#########################################################################

# we need to know how to find webglimpse
$WEBGLIMPSE_HOME = "/usr2/bgopal/webglimpse/webglimpse";
$CGIBIN = "wgcgi-bin";
$GLIMPSE_LOC = "/home/bgopal/bin/./glimpse";
$CONVERT_LOC = "/home/bgopal/bin/./wgconvert";

# lib directory
$WEBGLIMPSE_LIB = "$WEBGLIMPSE_HOME/lib";


# we don't need to know the settings; all done by webglimpse
$nh_pre=".nh.";
$REMOTEDIR = ".remote";
$MAPFILE = ".wgmapfile";

# name of config file
$CONFIGFILE = "archive.cfg";
 


#########################################################################
#########################################################################
### SITE CONF STUFF
#########################################################################
#########################################################################
$wgConfPath = "$WEBGLIMPSE_HOME/.wgsiteconf";
$prefix = "^DirectoryIndex|^UserDir|^Alias|^ScriptAlias|^DocumentRoot";

$DirectoryIndex="";
$UserDir="";
$DocumentRoot="";
# @AliasList=();
# @ScriptAliasList=();
# @ServerCache=();
$Port="";
$Server="";
$ServerAddress="";
#########################################################################
### END SITE CONF STUFF
#########################################################################

#---------------------------------
# make my libraries more important
unshift(@INC, "$WEBGLIMPSE_LIB");
require "config.pl";

&siteconf_ReadConf();
# start the header
print "Content-type: text/html\n\n";
print "<HEAD>\n";

# path info contains the archive directory
$archivepwd = $ENV{'PATH_INFO'};

#  Check that a query has been made
$query = $ENV{'QUERY_STRING'};

#  Strip the variables out from the query string,
#  and assign them into variables, prefixed by 'QS_'
@qvars = split( /\&/, $query );
foreach (@qvars) {
   split(/=/);
   $fname = $_[0];
   $fvalue = $_[1];
   $fvalue =~ s/\'//g;
   $cmd = "\$QS_$fname = '$fvalue';" ;
   # print ">>>",$cmd,"\n";
   $cmd = eval $cmd if ( $fname =~ /^[a-z_A-Z]\w*$/ );
}

# query is the file
$file = $QS_file;
# print "file: $file<br>\n";

$show_neighborhood=0;
if($QS_shownh){
	$show_neighborhood=$QS_shownh;
}

# go read the config file in the archive
if(&TestConfig($archivepwd)==0){
	&err_conf;
}
($title, $archiveurl, $traverse_type, $explicit_only, $numhops,
 $nhhops, $local_limit, $remote_limit, $addboxes, @urllist) = ReadConfig($archivepwd);

# look for the title for this page
$thistitle = &lookup_title($file);

### TO DO -- error if we can't find the file's title; all indexed files should
###          have at least 'No title'

if($file ne ""){
	print "<title>Full search from $thistitle</title>\n";
}else{
	print "<title>Full search</title>\n";
}
print "</head>\n\n";

print "<body><center>\n";
print "<table border=5>\n";
print "<tr><td align=center valign=middle>\n";
print "<a href=http://glimpse.cs.arizona.edu/webglimpse>\n";
print "<img src=$archiveurl/.glimpse-eye.jpg align=middle></td>\n";
print "<td align=center valign=middle>\n";
print "<a href=http://glimpse.cs.arizona.edu/webglimpse>\n";
print "<font size=+3>WebGlimpse </a> Search<br></font></td>\n";
print "</tr>\n\n";

print "<tr><td colspan=2>\n";
print "<FORM method=get ACTION=/$CGIBIN/webglimpse$archivepwd>\n";
if($file ne ""){
	$link = &siteconf_LocalFile2Url($file);
	print "<input name=file type=hidden value=$file>\n";
	print "Search:\n";
	print "<INPUT TYPE=radio NAME=scope VALUE=neighbor CHECKED>\n";
	print "The neighborhood of <a href=$link>$thistitle</a>\n";
	print "<INPUT TYPE=radio NAME=scope VALUE=full>The full archive: $title\n";
}else{
	print "<center>Search on the entire archive.</center>\n";
}
print "</td></tr>\n\n";

print "<tr><td colspan=2>\n";
print "String to search for: <INPUT NAME=query size=30>\n";
print "<INPUT TYPE=submit VALUE=Submit>\n";
print "<br>\n";
print "<center>\n";
print "<INPUT NAME=case TYPE=checkbox>Case&#160;sensitive\n";
print "<!-- SPACES -->&#160;&#160;&#160;\n";
# print "<!-- SPACES -->&#160;&#160;&#160;&#160;&#160;&#160;&#160;\n";
print "<INPUT NAME=whole TYPE=checkbox>Partial&#160;match\n";
print "<!-- SPACES -->&#160;&#160;&#160;\n";
print "<INPUT NAME=lines TYPE=checkbox>Jump&#160;to&#160;line\n";
print "<!-- SPACES -->&#160;&#160;&#160;\n";
print "<SELECT NAME=errors align=right>\n";
print "<OPTION>0\n";
print "<OPTION>1\n";
print "<OPTION>2\n";
print "</SELECT>\n";
print "misspellings&#160;allowed\n";
print "<br>\n";
print "</center>\n";
print "Return only files modified within the last <INPUT NAME=age size=5>\n";
print "days.\n";
print "<br>\n";
print "Maximum number of files returned:\n";
print "<SELECT NAME=maxfiles>\n";
print "<OPTION>10\n";
print "<OPTION selected>50\n";
print "<OPTION>100\n";
print "<OPTION>1000\n";
print "</SELECT>\n";
print "<br>Maximum number of matches per file returned:\n";
print "<SELECT NAME=maxlines>\n";
print "<OPTION>10\n";
print "<OPTION selected>30\n";
print "<OPTION>50\n";
print "<OPTION>500\n";
print "</SELECT>\n";
print "<br>\n";
print "</FORM>\n";
print "</td></tr>\n";
print "<tr><td colspan=2>\n";
print "<center>\n";
print "<font size=-2><a href=http://glimpse.cs.arizona.edu>\n";
print "Glimpse</a> and <a href=http://glimpse.cs.arizona.edu/webglimpse>\n";
print "WebGlimpse</a>, Copyright &copy; 1996, \n";
print "University of Arizona\n";
print "</center>\n";
print "</font></td></tr>\n";
print "</table></center>\n";
print "</center>\n\n<p>";

$path_info = $ENV{'PATH_INFO'};

if($file ne ""){
	if($show_neighborhood!=0){
		# now add the files in that neighborhood
		print "<hr><br>Pages in the neighborhood of $file:\n";
		$neighborhood="$file";
		# add the prefix
		$neighborhood=~ s/([^\/]+)$/$nh_pre$1/;
	
	        #CALL CONVERT HERE BEFORE OPENING NEIGHBOURHOOD FILE --> bgopal oct/6/96
	        #$cmd = "$CONVERT_LOC -U -in -H $path_info < $neighborhood 2>&1 |";
		#DON'T USE -U SINCE I WANT ALL INFORMATION...
	        $cmd = "$CONVERT_LOC -in -H $path_info < $neighborhood 2>&1 |";
	        if (!open(NH, $cmd )) {
	          print "No neighborhood file found.\n";
	        }
		else {
			print "<ul>\n";
			while(<NH>){
				#$output = $_;
				$tempoutput = $_;
				chomp $tempoutput;
				@outputarray = split(" ", $tempoutput);
				$output = @outputarray[0];
				shift outputarray;
				$link = @outputarray[0];
				shift outputarray;
				$title = join(" ", @outputarray);
				next if ($output =~ /^$archivepwd\/$file$/);   # skip if it's the file itself

				### TO DO -- change output to filename
				#
				# get the title
				#$title = &lookup_title($output);
				#
				# get the URL
				#$link = &siteconf_LocalFile2Url($output);
				#
				# if there is no title, use the 'real' path
				if($title eq "No Title"){
					$title = $link;
				}
				else {
				    if($title eq ""){
					$title = $link;
				    }
				}

				print "<li><a href=$link>$title</a>\n";
			}
			close(NH);
		}
	}else{
		print "<center><h3>\n";
		print "<a href=/$CGIBIN/webglimpse-fullsearch$archivepwd?file=$file&shownh=1>List the neighborhood</a> of \"$thistitle\"<p>\n";
		print "</h3></center>\n";
	}
}
		
print "</body>\n";
print "</html>\n";




#########################################################################
sub err_conf{
	print "<title>Cannot find archive</title>\n";
	print "</head><body>\n";
	print "<h1>Specified directory $archivepwd is not an archive directory.</h1>\n";
	print "Configuration file was not found.\n";
	print "</body></html>\n";
	exit -1;
}

sub lookup_title{
	local($file) = @_;
	local($intitle, $title);
	if (open(IN, "<$file")) {
		$intitle = 0;
		line: while (<IN>) {
			chomp;
			if((/\<title\>(.*)$/i)) {
				$intitle = 1;
				$title = $1;
			} elsif ($intitle) {
				$title .= " $_";
			}
			if ($intitle && $title =~ s#</title>.*##i) {
				last line;
			}
		}
	}
	# if there's no title, just return "", let webglimpse write 'No title'.
	# if($title eq ""){
		# $title="No title";
	# }

	return $title;
}

########################################################################
####  SITE CONFIGURATION FUNCTIONS ####
########################################################################

########################################################################
sub siteconf_ReadConf   {
   local(@thearray);
 
   open (WMCONF, "$wgConfPath") || die "Cannot read $wgConfPath.\n";
 
#  hmm, I am not sure if it's a bug. If you have 2 of
#  DirectoryIndex, UserDir or DocumentRoot, we use the last one.
 
   # load up the HomeDirArray
   # NOT NEEDED HERE
   # while(@thearray = getpwent()){
      # $HomeDir{@thearray[0]} = @thearray[7];
   # }
 
   while (<WMCONF>)  {
      if (/^DirectoryIndex[\s]*([\S]*)/i) {
         $DirectoryIndex = $1;
      } elsif (/^UserDir[\s]*([\S]*)$/i)  {
         $UserDir = $1;
      } elsif (/^DocumentRoot[\s]*([\S]*)$/i)   {
         $DocumentRoot = $1;
       } elsif (/^Alias[\s]*([\S]*)[\s]*([\S]*)$/i) {
         # do nothing.  Not needed.
         # push(@AliasList, $2);
      } elsif (/^ScriptAlias[\s]*([\S]*)[\s]*([\S]*)$/i) {
         # do nothing.  Not needed.
         # push(@ScriptAliasList, $2);
      } elsif (/^Port[\s]*([\S]*)$/i)  {
         $Port = $1;
      } elsif (/^Server[\s]*([\S]*)$/i)   {
         $Server = $1;
      }
   }
   if ($DirectoryIndex eq "") {
      $DirectoryIndex = "index.html";
   }
   local($name,$aliases,$dm3,$dm4,$addrs) = gethostbyname($Server);
   local($alias);

   ### SERVER CACHE STUFF NOT NEEDED
   # $ServerCache{$Server} = $addrs;
   # $ServerCache{$name} = $addrs;
   # foreach $alias (@aliases)  {
      # $ServerCache{$alias} = $addrs;
   # }
   $ServerAddress = $addrs;
}

sub siteconf_LocalFile2Url {
   local($file) = @_;
   local($alias, $homedir, $url);
 
   if ($Port eq "80")   {
      $portPart = "";
   } else   {
      $portPart = ":$Port";
   }
 
   if ($file =~ /^$DocumentRoot(.*)/)  {
      $url = "http://$Server$portPart/$1";
      return $url;
   }
 
   #  We are NOT going for longest match.
   foreach $alias (keys %Alias)  {
      $homedir = $Alias{$alias};
      if ($file =~ /^$homedir(.*)$/)   {
         $url = "http://$Server$portPart$alias/$1";
         return $url;
      }
   }
 
   return "";
}

