#!/usr/bin/perl
# $Id: facets-test.pl,v 1.16 2007/10/13 09:00:00 eric Exp $

# ./facets-test.pl facets.rs data-r2/bound data-rq/dir2
#   read facets from facets.rs and execute tests on each of the rq files in
#   data-r2/bound data-rq/dir2 .
# ./facets-test.pl "//SelectQuery[count(Var) > 1]" a.rq 
#   see if that XPath is in a.rq .


use strict;

use XML::XPath;
use XML::XPath::XMLParser;
use SPARQL;

@TextPresenter::ISA = qw(Presenter);
@SQLPresenter::ISA = qw(Presenter);
@HTMLPresenter::ISA = qw(Presenter);

# facetTuple indexes:
use constant Facet => 0;
use constant XPath => 1;
use constant Feature => 2;
use constant FeatureDesc => 3;

# testsByCount and prototypes index:
use constant Path => 0;
use constant Filepath => 1;
use constant Bits => 2;

my $presenterType = 
    ($ARGV[0] eq '-t' && shift @ARGV) ? 'TextPresenter' : 
    ($ARGV[0] eq '-s' && shift @ARGV) ? 'SQLPresenter' : 
    ($ARGV[0] eq '-h' && shift @ARGV) ? 'HTMLPresenter' : 
    'TextPresenter'; # default to Text
&main($presenterType, shift @ARGV, [@ARGV]);

sub main {
    my ($presenterType, $facetArg, $paths) = @_;
    my $facetTuples = &loadFacets($facetArg);

    # Create a presenter to handle all the serialization.
    my $presenter = $presenterType->new(-tuples => $facetTuples);
    my ($testsByCount) = &parseTests($facetTuples, $paths, $presenter);
    &summarizeResults($testsByCount, $facetTuples, $presenter);
    $presenter->finish;
}

sub loadFacets {
    my ($facetArg) = @_;
    my $facetTuples = [];
    if (-f $facetArg) {
	my $xp = XML::XPath->new(filename => $facetArg);
	$xp->set_namespace('rs', 'http://www.w3.org/2005/sparql-results#');
	my $results = $xp->find('/rs:sparql/rs:results/rs:result');

	foreach my $node ($results->get_nodelist) {
	    my $facet = $xp->find('rs:binding[@name="facet"]/rs:uri/text()', $node);
	    my $xpath = $xp->find('rs:binding[@name="xpath"]/rs:literal/text()', $node);
	    my $feature = $xp->find('rs:binding[@name="feature"]/rs:uri/text()', $node);
	    my $featureDesc = $xp->find('rs:binding[@name="featureDesc"]/rs:literal/text()', $node);
 #    use Data::Dumper; print Dumper($xpath);
	    push (@$facetTuples, [substr($facet, 50), $xpath, substr($feature, 50), $featureDesc]); # Facet XPath Feature FeatureDesc
	    # printf("scanning facet %d(%s): %s\n", (scalar @$facetTuples)-1, $facet, $xpath);
	}
    } else {
	push (@$facetTuples, [$facetArg, split(/(?=\/\/)/, $facetArg, 2)]); # Facet XPath
    }
    return $facetTuples;
}

sub parseTests {
    my ($facetTuples, $paths, $presenter) = @_;

    my $testsByCount = {};
    my $maxWidth = 0;
    foreach my $path (@$paths) {
	$presenter->scanPath($path) if ($path ne '-');

	my @files;
	my $pleasePrint = 0;
	if (-d $path) {

	    # $path is a directory; get $path/*.rq
	    $path =~ s/\/$//;
	    opendir(DIR, $path) || die "Couldn't opendir($path): $!";
	    @files = map {"$path/$_"} sort grep {/\.rq$/} readdir(DIR);
	    closedir(DIR);
	} elsif (-f $path || $path eq '-') {

	    # $path is a file
	    @files = ($path);
	    $path = '';
	    $pleasePrint = 1;
	} else {
	    die "can't find \"$path\"";
	}

	foreach my $filepath (@files) {
 	    if (my ($facetCount, $testBits) = 
		&testFacets($filepath, $facetTuples, $pleasePrint, $presenter)) {

		# For presentation purproses, note widest $filepath
		if ((length $filepath) > $maxWidth) {
		    $maxWidth = length $filepath;
		}

		# Store test info binned by number of facets.
		push (@{$testsByCount->{$facetCount}}, 
		      [$path, $filepath, $testBits]);	# Path, Filepath, Bits
	    } else {
		# printf("skipping $filepath\n");
	    }
	}
    }

    # Tell presenter the path width
    $presenter->setPathWidth($maxWidth);
    return ($testsByCount);
}

use constant SPARQL_NS => 'http://www.w3.org/2005/01/yacker/uploads/SPARQL/';
use constant Yacker_NS => 'http://www.w3.org/2005/01/yacker/';
sub testFacets {
    my ($filepath, $facetTuples, $pleasePrint, $presenter) = @_;

    # Read file into $rq.
    my $rq;
    local $/;
    $/ = undef;
    if ($filepath eq '-') {
	print "enter query string followed by a ^d:\n";
	$rq = <>;
    } else {
	open (RQ, $filepath) || die "Couldn't open \"$filepath\": $!";
	$rq = <RQ>;
	close (RQ);
    }
    &utf8::decode($rq);

    # Parse $rq as a SPARQL query as serialize in XML form.
    my $rqAsXML;
    eval {
	my $root = new SPARQL()->parse($rq)->[0];
	$rqAsXML = $root->toXML('', {'' => SPARQL_NS, 'yacker' => Yacker_NS});
	&utf8::encode($rqAsXML);
    }; if ($@) {
	$@ =~ s/\n/\\n/g;
	print STDERR sprintf("error in $filepath: $@\n");
	return ();
    }

    # Pass XML form to XPath engine.
    my $xp = XML::XPath->new(xml => $rqAsXML);
    # facets.ttl counts on default namespace for SPARQL.
    $xp->set_namespace('', SPARQL_NS);
    $xp->set_namespace('yacker', Yacker_NS);

    my $facetCount = 0;
    my $testBits = undef;
    for (my $i = 0; $i < @$facetTuples; $i++) {
	# Try each XPath on this document.
	eval {
	    my $nodeset = $xp->find($facetTuples->[$i][XPath]);
	    if (my $count = $nodeset->size) {
		# Matched at least once.
		vec($testBits, $i, 1) = 1;
		$facetCount++;
		if ($pleasePrint) {
		    $presenter->settingBit($filepath, 
					   $facetTuples->[$i][Facet], $count);
		}
	    } else {
		vec($testBits, $i, 1) = 0;
	    }
	}; if ($@) {
	    $presenter->skipFacet($i, $facetTuples->[$i][XPath], $@);
	}
    }
    return ($facetCount, $testBits);
}

# bits2filepath index:
use constant Filepaths => 0;
use constant Introduced => 1;

sub summarizeResults {
    my ($testsByCount, $facetTuples, $presenter) = @_;

    my $prototypes = [];	# Prototypical query for any given facet.
    my $bits2filepath = {};	# Index of tests with a given facet set.
    my $facetCoverage = undef;	# Bitfield of total coverage by the test set.
    vec($facetCoverage, @$facetTuples, 1) = 0; # Set width of bitfield.

    # Test results are binned by number of facets hit by test.
    # Start with the tests that target the fewest facets.
    foreach my $totalFacets (sort {$a <=> $b} keys %$testsByCount) {
	foreach my $test (@{$testsByCount->{$totalFacets}}) {
	    my ($path, $filepath, $testBits) = @$test;

	    my $introducedFacets;	# How many new facets this test hits.
	    vec($introducedFacets, @$facetTuples, 1) = 0;
	    for (my $i = 0; $i < @$facetTuples; $i++) {
		if (vec($testBits, $i, 1) && !vec($facetCoverage, $i, 1)) {
		    # Exciting new test...
		    $prototypes->[$i] = [$path, $filepath, $testBits]; # Path, Filepath, Bits
		    vec($facetCoverage, $i, 1) = 1;
		    vec($introducedFacets, $i, 1) = 1;
		}
	    }

	    if ($bits2filepath->{$testBits}) {
		# Another test already hit these facets.
		push (@{$bits2filepath->{$testBits}[Filepaths]}, $filepath);
	    } else {
		# First test with this exact set of facets.
		$bits2filepath->{$testBits} = [[$filepath], $introducedFacets]; # Filepaths. Introduced
	    }

	    $presenter->showBits($filepath, $introducedFacets, $totalFacets, $testBits, $prototypes);
	}
    }
    &reportMissingFacets($facetTuples, $facetCoverage, $presenter, $prototypes);

    # $bits2filepath has a lists of identical tests. The first is *special*.
    foreach my $testBits (sort keys %$bits2filepath) {
	if (@{$bits2filepath->{$testBits}[Filepaths]} > 1 || 
	    !$bits2filepath->{$testBits}[Introduced]) {
	    $presenter->showShadows($testBits, $prototypes, 
				    $bits2filepath->{$testBits}[Filepaths], 
				    $bits2filepath->{$testBits}[Introduced]);
	}
    }
}

sub reportMissingFacets {
    my ($facetTuples, $facetCoverage, $presenter, $prototypes) = @_;

    my $missing = [];	# missed facets
    my $ones = undef;	# 111111... once for each facet.

    # Make list of missing facets.
    for (my $i = 0; $i < @$facetTuples; $i++) {
	if (!vec($facetCoverage, $i, 1)) {
	    push (@$missing, $facetTuples->[$i][Facet]);
	}
	vec($ones, $i, 1) = 1;
    }
    $presenter->showMissingFacets($facetCoverage, $ones, $missing, $prototypes);
}


package Presenter;
sub new {
    my ($proto, @parms) = @_;
    my $class = ref($proto) || $proto;
    my $self = {@parms};
    bless ($self, $class);
    return $self;
}
sub setPathWidth {
    my ($self, $pathWidth) = @_;
    $self->{-pathWidth} = $pathWidth;
}

package TextPresenter;
# testsByCount and prototypes index:
use constant Path => 0;
use constant Filepath => 1;
use constant Bits => 2;

# bits2filepath index:
use constant Filepaths => 0;
use constant Introduced => 1;
sub scanPath {
    my ($self, $path) = @_;
    printf("... %s\n", $path);
}
sub settingBit {
    my ($self, $filepath, $facet, $count) = @_;
    printf("%s: %s:%d\n", $filepath, $facet, $count);
}
sub skipFacet {
    my ($self, $i, $xpath, $error) = @_;
    print STDERR sprintf("skipping facet %d(%s): %s\n", $i, $xpath, $error);
}
sub showBits {
    my ($self, $filepath, $introducedFacets, $totalFacets, $testBits, $prototypes) = @_;
    printf("%$self->{-pathWidth}s: %03d/%03d:%s\n", $filepath, 
	   (scalar grep {$_ != 0} split(//, unpack("b*", $introducedFacets))), 
	   $totalFacets, unpack("b*", $testBits));
}
sub showMissingFacets {
    my ($self, $facetCoverage, $ones, $missing, $prototypes) = @_;
    printf("%$self->{-pathWidth}s: %03d/%03d:%s\n%d facets not covered: %s\n", 
	   "COVERAGE", @{$self->{-tuples}} - @$missing, scalar @{$self->{-tuples}}, unpack("b*", $facetCoverage ^ $ones), 
	   scalar @$missing, join(', ', @$missing));
}
sub showShadows {
    my ($self, $testBits, $prototypes, $list, $introduced) = @_;
#    printf("%s shadows: %s\n", $list->[Filepaths][0], join(', ', @{$list->[Filepaths]}[1..@$list-1]));
    printf("shadows: %s\n", join(', ', @$list));
    if (!$introduced) {
	printf("%$self->{-pathWidth}s: %03d/%03d:%s\n", 
	       "--", 0, scalar @{$self->{-tuples}}, unpack("b*", $testBits));
	for (my $i = 0; $i < @{$self->{-tuples}}; $i++) {
	    if (vec($testBits, $i, 1)) {
		printf("%$self->{-pathWidth}s: %03d/%03d:%s\n", 
		       $prototypes->[$i][Filepaths], $i, 
		       scalar @{$self->{-tuples}}, 
		       unpack("b*", $prototypes->[$i][Bits]));
	    }
	}
    }
}
sub finish {}

package SQLPresenter;
# facetTuple indexes:
use constant Facet => 0;
use constant XPath => 1;
use constant Feature => 2;
use constant FeatureDesc => 3;

# testsByCount and prototypes index:
use constant Path => 0;
use constant Filepath => 1;
use constant Bits => 2;

# bits2filepath index:
use constant Filepaths => 0;
use constant Introduced => 1;
sub scanPath {
    my ($self, $path) = @_;
    print STDERR sprintf("... %s\n", $path);
}
sub settingBit {
    my ($self, $filepath, $facet, $count) = @_;
    print STDERR sprintf("%s: %s:%d\n", $filepath, $facet, $count);
}
sub skipFacet {
    my ($self, $i, $xpath, $error) = @_;
    print STDERR sprintf("skipping facet %d(%s): %s\n", $i, $xpath, $error);
}
sub showBits {
    my ($self, $filepath, $introducedFacets, $totalFacets, $testBits, $prototypes) = @_;
    if (!$self->{-tablesCreated}) {
	# Create feature table.
	my (%features) = map {$_->[Feature] => $_->[FeatureDesc]} @{$self->{-tuples}};
	printf("CREATE TABLE feature (pk int unsigned not null  auto_increment primary key, name VARCHAR(127), description VARCHAR(255), UNIQUE u_name(name));\n");
	printf("INSERT INTO feature (name, description) VALUES %s;\n", join(', ', map {"('$_', '$features{$_}')"} sort {$a cmp $b} keys %features));

	# Create facet table.
	printf("CREATE TABLE facet (pk int unsigned not null  auto_increment primary key, name VARCHAR(127), example VARCHAR(255), xpath VARCHAR(127), feature int unsigned not null, UNIQUE u_name(name));\n");
	printf("INSERT INTO facet (name, feature) VALUES %s;\n", join(', ', map {"('$_->[Facet]', (SELECT pk FROM feature WHERE name='$_->[Feature]'))"} @{$self->{-tuples}}));

	# Create query table.
	printf("CREATE TABLE query (pk int unsigned not null  auto_increment primary key, filepath VARCHAR(127), UNIQUE u_filepath(filepath));\n");
	printf("CREATE TABLE queryFacet (pk int unsigned not null  auto_increment primary key, query int unsigned not null, facet int unsigned not null, canonical BOOLEAN, UNIQUE u_query_facet(query, facet));\n");

	# Tables we'll need later:
	# implementation -- tools that have implemented SPARQL
	printf("CREATE TABLE implementation (pk INT UNSIGNED NOT NULL  AUTO_INCREMENT PRIMARY KEY, projectName VARCHAR(127), releaseName VARCHAR(127), releaseDate DATETIME, reporterName VARCHAR(127), reporterPage VARCHAR(127), localFile VARCHAR(127), UNIQUE  u_release(projectName, releaseDate), UNIQUE u_file(localFile));\n");

	# test -- associate a test with it's query.
	printf("CREATE TABLE test (pk INT UNSIGNED NOT NULL  AUTO_INCREMENT PRIMARY KEY, name VARCHAR(127), query INT UNSIGNED NOT NULL, approval BOOLEAN, requires BOOLEAN, UNIQUE  u_name(name));\n");

	# report -- earl assertions of passing/failing tests.
	printf("CREATE TABLE report (pk INT UNSIGNED NOT NULL  AUTO_INCREMENT PRIMARY KEY, implementation INT UNSIGNED NOT NULL, test INT UNSIGNED NOT NULL, pass BOOLEAN, UNIQUE  u_pair(implementation, test));\n");

	$self->{-tablesCreated} = 1;
    }
    printf("INSERT INTO query (filepath) VALUES ('%s');\n", $filepath);
    for (my $iTuple = 0; $iTuple < @{$self->{-tuples}}; $iTuple++) {
	if (vec($testBits, $iTuple, 1)) {
	    my $introduced = vec($introducedFacets, $iTuple, 1) == 1;
	    printf("
INSERT INTO queryFacet (query, facet, canonical)
                VALUES ((SELECT pk FROM query WHERE filepath='%s'),
                        (SELECT pk FROM facet WHERE name='%s'),
                        %s);\n", $filepath, $self->{-tuples}[$iTuple]->[Facet], $introduced ? 'TRUE' : 'FALSE');
	}
    }
}
sub showMissingFacets {
    my ($self, $facetCoverage, $ones, $missing, $prototypes) = @_;
    return;
    printf("INSERT INTO tests (%s) %$self->{-pathWidth}s: %03d/%03d:%s\n%d facets not covered: %s\n", 
	   @{$self->{-tuples}} - @$missing, scalar @{$self->{-tuples}}, unpack("b*", $facetCoverage ^ $ones), 
	   scalar @$missing, join(', ', @$missing));
}
sub showShadows {
    my ($self, $testBits, $prototypes, $list, $introduced) = @_;
    return;
    printf("shadows: %s\n", join(', ', @$list));
    if (!$introduced) {
	printf("%$self->{-pathWidth}s: %03d/%03d:%s\n", 
	       "--", 0, scalar @{$self->{-tuples}}, unpack("b*", $testBits));
	for (my $i = 0; $i < @{$self->{-tuples}}; $i++) {
	    if (vec($testBits, $i, 1)) {
		printf("%$self->{-pathWidth}s: %03d/%03d:%s\n", 
		       $prototypes->[$i][Filepaths], $i, 
		       scalar @{$self->{-tuples}}, 
		       unpack("b*", $prototypes->[$i][Bits]));
	    }
	}
    }
}
sub finish {}

package HTMLPresenter;
use constant Facet => 0;
use constant XPath => 1;
use constant Test => 2;

# testsByCount and prototypes index:
use constant Path => 0;
use constant Filepath => 1;
use constant Bits => 2;

# bits2filepath index:
use constant Filepaths => 0;
use constant Introduced => 1;

sub new {
    my ($proto, @parms) = @_;
    my $class = ref($proto) || $proto;
    my $self = $class->SUPER::new(@parms);
    $self->{-anchors} = {};
    printf "<?xml version=\"1.0\" encoding=\"UTF-8\"?>
<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">
<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">
  <head>
    <title>Facets Coverage Report</title>
    <style type=\"text/css\">
/*<![CDATA[*/
pre.data	{ border: thin solid #88AA88;
                  background-color: #E8F0E8;
                  margin: 1em 4em 1em 0em ; }
.new		{ background-color: #7f7 ; }
/*]]>*/
</style>
  </head>
  <body>
    <h1>Facets Coverage Report</h1>
    <p>The document explores the coverage of SPARQL tests with respect to the ".
    "facets described in <a href=\"facets\">facets</a>. Each facet is assigned".
    " a bit corresponding to its test:order number. The links in the following".
    " rediculous block of bits link to the facet. Those in the <a href=\"".
    "#coverage\">COVERAGE</a> row point to the test that most uniquely tests ".
    "this facet.</p>
    <pre>
";
    return $self
}
sub scanPath {
    my ($self, $path) = @_;
    print STDERR "... $path\n";
}
sub settingBit {
    my ($self, $filepath, $facet, $count) = @_;
    print STDERR sprintf("%s matched %s %d time(s)\n", $filepath, $facet, $count);
}
sub skipFacet {
    my ($self, $i, $xpath, $error) = @_;
    print STDERR sprintf("skipping facet %d(%s): %s\n", $i, $xpath, $error);
}
sub _bitText {
    my ($self, $bits, $prototypes, $introducedFacets) = @_;
    my @bitText;
    for (my $i = 0; $i < @{$self->{-tuples}}; $i++) {
	my $bit = vec($bits, $i, 1);

	my $href = $prototypes->[$i][Filepath] ? 
	    "$prototypes->[$i][Filepath]" : 
	    "facets#$self->{-tuples}[$i][Facet]";

	my $style = vec($introducedFacets, $i, 1) ? ' class="new"' : '';

	my $title = $self->{-tuples}[$i][Facet];
	push (@bitText, "<a href=\"$href\" title=\"$title\"$style>$bit</a>");
    }
    return join('', @bitText);
}
sub _fixedWidthHref {
    my ($self, $filepath, $href, $anchorStr) = @_;
    return sprintf("%s<a href=\"%s\"%s>%s</a>", 
		   ' ' x ($self->{-pathWidth} - length $filepath), 
		   $href, $anchorStr, $filepath);
}
sub showBits {
    my ($self, $filepath, $introducedFacets, $totalFacets, $testBits, $prototypes) = @_;
    my $anchorStr = '';

    # Can call showBits(foo) and only the first will create an anchor.
    if (!$self->{-anchors}{$filepath}) {
	$self->{-anchors}{$filepath} = $filepath;
	$self->{-anchors}{$filepath} =~ s/_/__/g;
	$self->{-anchors}{$filepath} =~ s/\//_/g;
	$anchorStr = sprintf(" id=\"%s\"", $self->{-anchors}{$filepath});
    }

    printf("%s: %03d/%03d:%s\n", 
	   $self->_fixedWidthHref($filepath, $filepath, $anchorStr), 
	   (scalar grep {$_ != 0} split(//, unpack("b*", $introducedFacets))), 
	   $totalFacets, $self->_bitText($testBits, undef, $introducedFacets)); # $prototypes));
}
sub showMissingFacets {
    my ($self, $facetCoverage, $ones, $missing, $prototypes) = @_;
    printf("%s: %03d/%03d:<span class=\"coverage\">%s</span>
%d facets not covered:</pre>
    <ul>\n%s    </ul>\n", 
	   $self->_fixedWidthHref("COVERAGE", "#coverage", ' id="coverage"'), 
	   @{$self->{-tuples}} - @$missing, scalar @{$self->{-tuples}}, 
	   $self->_bitText($facetCoverage, $prototypes, undef), 
	   scalar @$missing, 
	   join('', map {
	       "      <li><a href=\"facets#$_\">$_</a></li>\n"
		} @$missing));
}
sub showShadows999 {
    my ($self, $list) = @_;
    printf("    <p><a href=\"#%s\">%s</a> shadows:</p>\n    <ul>\n%s    </ul>\n", 
	   $self->{-anchors}{$list->[0]}, $list->[0], 
	   join('', map {
	       "      <li><a href=\"#$self->{-anchors}{$_}\">$_</a></li>\n"
		} @$list[1..@$list-1]));
}
sub showShadows {
    my ($self, $testBits, $prototypes, $list, $introduced) = @_;
#    printf("%s shadows: %s\n", $list->[Filepaths][0], join(', ', @{$list->[Filepaths]}[1..@$list-1]));
    printf("      <p>shadows: %s</p>\n", join(', ', map {"<a href=\"$_\">$_</a>"} @$list));
    if (!$introduced) {
	printf("      <pre>%$self->{-pathWidth}s: %03d/%03d:%s\n", 
	       "--", 0, scalar @{$self->{-tuples}}, $self->_bitText($testBits, undef, undef));
	for (my $i = 0; $i < @{$self->{-tuples}}; $i++) {
	    if (vec($testBits, $i, 1)) {
		# Render the test that already set covered this facet.

		# Show which facet it is.
		my $onebit = undef;
		vec($onebit, @{$self->{-tuples}}, 1) = 0;
		vec($onebit, $i, 1) = 1;

		my $filepath = $prototypes->[$i][Filepath];
		my $title = $self->{-tuples}[$i][Facet];
		my $bitStr = substr(unpack("b*", $onebit), 0, @{$self->{-tuples}});
		$bitStr =~ s/0/ /g;
		$bitStr =~ s/1/<a href="facets#$title" title="$title">1<\/a>/g;

		printf("%s: %03d/%03d:%s\n", 
		       $self->_fixedWidthHref($filepath, $filepath, ''), 
		       $i, scalar @{$self->{-tuples}}, $bitStr);
	    }
	}
	print "</pre>\n";
    }
}
sub finish {
    printf "
  </body>
</html>
"
}

