#!/usr/local/bin/perl -w
# $Id: detab.pl,v 1.6 2001/01/23 22:08:12 ted Exp $
# detab quick hack to strip table tags out of pages for WAI
#called as uri,detab on this server and transparently proxies so relative uris stay relative
#RewriteRule ^(.+),detab$        http://cgi.w3.org/cgi-bin/tablin.pl?uri=http://%{HTTP_HOST}$1 [P,L]

$ENV{TERM}="vt100";
$ENV{LYNX_TEMP_SPACE}="/tmp";
$lynx='/usr/local/bin/lynx';
print "Content-type: text/html\n\n";
if ( $ENV{'QUERY_STRING'} ) {
    @pairs = split(/[&;]/, $ENV{QUERY_STRING});
    foreach $pair (@pairs)  {
	($name, $value) = split(/=/, $pair);
	$value =~ tr/+/ /;
	$value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
	$FORM{$name} = $value;
    }
    $uri = $FORM{uri};
}
else {
    print "<html><body>no uri, this was accessed improperly</body></html>";
    exit;
}
open(URI, "$lynx -source -validate $uri |");
$baseuri =~ $uri;

#what to do about table tags spanning multi-lines?  
#perhaps something more efficient than mashing into one huge buffer
while(<URI>) {
    $page .= $_;
}
$page =~ s/<\/?(td|tr|table|tbody|thead|tfoot|th|caption|colgroup|col)[^>]*>//ig;
$page =~ s/,detab//ig;
# our full_uri#frag may be wrapped in ' or " and if there's one the other can appear within the uri or frag
# we might just be relative as only a frag too

#couldn't get back ref within a [^] :-( so i split into the 3 choices explicitly - wanted to keep 1-liner
# if you're reading this and see how to make it work, let me know ted@w3.org
#$page =~ s!(href *= *(\'|\")?($uri)?\#[^\2]+)!\1,detab!ig;

#$page =~ s!(href *= *\'($uri)?\#[^\']+)!\1,detab!ig;
#$page =~ s!(href *= *\"($uri)?\#[^\"]+)!\1,detab!ig;
#$page =~ s!(href *= *($uri)?\#[^> ]+)!\1,detab!ig;
print $page;





