#!/usr/local/bin/perl
# -*-Perl-*-
# lsl2html
# $Id: lsl2html,v 1.7 2003/10/01 07:22:16 connolly Exp $
# 
# based on
#    http://www.ececs.uc.edu/~kbse/lsl2html/
# by John Penix
# University of Cincinnati KBSE Lab  
# jpenix@ece.uc.edu
#
#
# Given Trait.lsl as an argument, the program creates the file
# Trait.html in the current directory.  There is also an option (-I) to
# create an index for a directory.
# 
# 
# Trait Names
# 
# The current version of the program uses a brute force method for
# determining trait names, as opposed to the syntax of the language.
# The directories in the LARCH_PATH environment variable are searched to
# find all visible trait names.  The When one of these names is found in
# the file, it is turned into a hyperlink.  
# 
# 
#  URLs
# 
# Each directory in LARCH_PATH has a URL associated with it.  There
# are three case for determining the URL for a directory:
# 
# 1. The first line of the file .lsl2htmlURL in each LARCH_PATH 
#    directory is checked for a URL.  The URL should end in a slash,
#    without quotes:
# 
#        http://www.aaa.bbb/ddd/eee/fff/
#     or
#        file:/www.aaa.bbb/ddd/eee/fff/
#  
#  2. For the current directory ("." must be in LARCH_PATH) then
#     relative referenceing is used, i.e. the URL is just the filename.
#     This assumes that all of the HTML files created for the LSL files in
#     the current directory will also be kept together in one directory.
#  
#  3. If there is no .lsl2htmlURL file in the directory, then the URL
#     becomes file:DIRECTORY/, there DIRECTORY is the entry from
#     LARCH_PATH.  
# 
# 
#  Examples
#  
#  % lsl2html Trait.lsl
#   
#    Trait.html is created in the current directory.
# 
#  % lsl2html *.lsl
# 
#    HTML file is created for every .lsl file in the current directory.
# 
#  % lsl2html -I
# 
#    Creates an index referencing all of the HTML files for all
#    of the .lsl files in the directory.  Notice that the .lsl files must
#    be present for a trait to be recognized.
# 
#  % lsl2html -I *.lsl
#    
#    Creates HMTL files and an index for all .lsl files in the 
#    directory.
# 



# keyword regular expression
# note: putting \ before forall causes problem with word boundary match
$keyword = "forall|trait|includes|assumes|introduces|asserts|implies|converts|partitioned by|generated by|for|if|then|else|tuple of|union of|exempting|equations";

# makepretty
# make keyword boldface
sub makepretty {
		local ($out) = @_;
		# boldface keywords
		$out =~ s#\b($keyword)\b#<b>$1</b>#g;
		$out;
}


sub generateINDEX {
		@_ = sort(@_);
		open(INDEX,"> index.html");
		print INDEX "<head>\n<title>LSL Index</title>\n</head>\n";
		print INDEX "<body>\n";
		print INDEX "<h1>LSL Index</h1>\n";
		print INDEX "<ul>";
		foreach $trait (@_) {
				$trait =~ s/.lsl$//;
				print INDEX "<li><a href=\"$trait.html\">$trait</a></li>\n";
		}
		print INDEX "</ul>\n";
		print INDEX "</body>";
		print INDEX printCredits();
		close INDEX;
}



sub printCredits {
		if ($creditsFLAG) {
				"\n<hr />\nHTML generated using <a href=\"http://www.w3.org/XML/9711theory/#lsl2html\">lsl2html</a>.";
		} else {
				"";
		}
}

sub printError {
		print STDERR "\nlsl2html ERROR: ";
		print STDERR @_;
		print STDERR "\n";
}
				



# Associative array to map trait names to URLs
%TraitURL = ();


##
## begin main program loop
##


# check for arguments:
if ($#ARGV < 0) {
		print "\nUsage:\n  lsl2html Trait.lsl\n      results in Trait.html\n\n"
		}

# set flag defaults
$indexFLAG = 0;
$creditsFLAG = 1;

$altindex = "";

# check for flags - if begins with a -
if ($ARGV[0] =~ /^-/) {
		if ($ARGV[0] =~ /I/) { 
				$indexFLAG = 1; 
		}

    if ($ARGV[0] =~ /i/) {
				$indexFLAG = 2;
				shift(@ARGV);
				$altindex = $ARGV[0];
		}

		if ($ARGV[0] =~ /c/) {
				$creditsFLAG = 0;
		}
		# get rid of the flag arg
		shift(@ARGV);
} 


# find trait names and URL for each path in the LARCHPATH environment variable
if (!(exists $ENV{'LARCH_PATH'})) {
		printError("LARCH_PATH environment variable must be defined.");
		exit 1;
}

$larchpath = $ENV{'LARCH_PATH'};
# break path apart (at :s) into directories
@dirs = split(/:/,$larchpath);

if (scalar @dirs == 0) {
		printError("LARCH_PATH appears to be empty.");
		exit 1;
}

#print "Path is @dirs\n";

# get the trait names from each directory and build a reg expr
foreach $dir ( @dirs ) {
		opendir(DIR,$dir);
		@contents = readdir(DIR);
		#print "Contents of $dir: @contents\n";

		# set tratis to .lsl files
		@traits = grep(/.lsl$/,@contents);

		# set up URL 
		if  ($dir =~ /^.$/) {
				# local reference for . - assumes file is in current directory
				# print "local dir\n";
				$url = "";
				
				# make table of contents if necessary
				if ($indexFLAG==1) {
						generateINDEX(@traits);
				}
		} 
		elsif (grep(/^.lsl2htmlURL$/,@contents) >0) {
				# look for .lsl2htmlURL file 
				# get the URL for that directory
				# print "http URL\n";
				open(URLDATA,"$dir/.lsl2htmlURL");
				$url = <URLDATA>;
				close URLDATA;
				# chop off any trailing whitespace
				$url =~ s/^(\S*)\s*$/$1/;
		}
		else {
				# print "file URL\n";
				$url = "file:$dir/";
		}

		#print "\nURL for $dir is $url\n";
		# set up array to map traits to URL
		foreach $trait ( @traits ) {
				# get rid of .lsl
				$trait =~ s/.lsl$//;
				# print "trait $trait";
				# if not already defined, add
				unless (grep(/^$trait$/,keys(%TraitURL))) {
						$TraitURL{$trait} = $url;
						#print "$trait at $TraitURL{$trait}\n";
				}
		}
}

# make the world's biggest regular expression
$TraitExpr = join("|",keys(%TraitURL));
#print "$TraitExpr";


foreach $sourcefile ( @ARGV ) {

		# open source (.lsl) file
		# print "opening $sourcefile\n";
		open(SOURCE,"$sourcefile");

		# open target (.html) file
		$traitname = $sourcefile;
		$traitname =~ s/.lsl//; 
		$targetfile = "$traitname.html";
		# print "opening $targetfile\n";
		open(TARGET,"> $targetfile");

		# output html stuff

		print TARGET "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>\n";
		print TARGET "<title>$traitname</title>\n";
		print TARGET "</head>\n<body>\n";
				
		# output html preformatted code
#DWC		print TARGET "<pre>\n";


		# read a line from stdin into $_
		while (<SOURCE>) { 
				
				# look for and remove special comment symbol %html and 
				# leave html control characters in line
				if ( $_ =~ /^%html/ ) {
						$_ =~ s/%html//;
						print TARGET;
				}
				else {
						# replace HTML control characters: < > &
						$_ =~ s/&/&amp;/g;
						$_ =~ s/</&lt;/g;
						$_ =~ s/>/&gt;/g;

						# skip comment lines
						if ( $_ =~ /^\s*%/ ) {
								print TARGET;
						}

						else {
#DWC make spaces non-breaking
						$_ =~ s/^ +/"&#160;" x length($&)/ge;
						$_ =~ s/  +/"&#160;" x length($&)/ge;
						$_ =~ s/\t/"&#160;" x 8/ge;

#DWC
# http://www.w3.org/TR/MathML2/isotech.html
						    s,\\in,&#x2208;,g;
						    s,&lt;=&gt;,&#x021D4;,g;
						    s,=&gt;,&#x021D2;,g;
						    s,-&gt;,&#x02192;,g;
						    s,\\rarr,&#x021D2;,g;
						    s,\\top,&#x022A4;,g;
						    s,\\bot,&#x022A5;,g;
						    s,\\rarr,&#x021D2;,g;
						    s,\\E,&#x02203;,g;
						    s,\\A,&#x02200;,g;
						    s,\\neq,&#x02260;,g;
						    s,\\langle,&#x02329;,g;
						    s,\\rangle,&#x0232A;,g;
						    s,\\forall,&#x02200;,g;
						    s,==,&#x0003D;,g;
						    s,\\U,&#x222A;,g;
						    s,~,&#x000AC;,g;
						    s,/\\,&#x02227;,g;
						    s,\\/,&#x02228;,g;
						    s,\\superstar,<super>*</super>,g;

								# ignore line where current trait is declared
								unless ( $_ =~ /:\s*trait/ ) {
										# sub in URL html code
										$_ =~ s#\b($TraitExpr)\b#<a href="$TraitURL{$1}$1.html">$1</a>#g;
								}
								
								# boldface keywords
								print TARGET makepretty($_);
						}
						print TARGET "<br />";
					    }
		}
				
		# output html end preformatted code
#DWC		print TARGET "</pre>\n";

		# link to TOC if created this time
		if ($indexFLAG == 1) {
				print TARGET "<p>[<a href=\"index.html\">Index</a>]</p>\n";
		}
		elsif ($indexFLAG == 2) {
				print TARGET "<p>[<a href=";
				print TARGET $altindex;
				print TARGET ">Index</a>]</p>\n";
		}
				
		# print credit line
		print TARGET printCredits();

		# print end of html
		print TARGET "</body>\n</html>";

		close SOURCE;
		close TARGET;
}
