#!/usr/bin/perl -w
#
# $Id: shadowtr.pl,v 1.3 2003/10/08 21:50:56 dbeckett2 Exp $
#
# Turn a pile of WDs in a working area into a consistent (I hope)
# set of WDs in another directory.  Requires a CVS-checked out
# area to start with (to determine which files to copy).
# Modifies the Overview.html to the correct new dates, hopefully.
#
# Dave Beckett, ILRT University of Bristol
#
# This is under the following W3C Software License
# http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231
#

use strict;
use File::Copy;
use File::Path;

# Where the input directories are.
# This MUST be a checked out CVS area.
#   Subdirectories 'status-name-fromdate' are read and the files inside.
my $from_top=$ENV{HOME}."/w3ccvs/WWW/2001/sw/RDFCore/TR";

# Where the output directories & files will go.
#   Subdirectories 'shortname' are written below here (created if needed)
my $to_top="staging";

# Set to 0 to actually do work
my $dryrun=0;

# Set to 1 to bug you, 2 to really annoy you with lots of output
my $debug=1;

# Dates - human readable ones are derived below
# Old doc dates
my $from_date="20030117";
my $from_date2="20030905";
# New date
my $to_date="20031010";

# The status-shortname
#my (@status_shortnames)=qw(
#NOTE-lbase
#WD-rdf-concepts
#WD-rdf-mt
#WD-rdf-schema
#WD-rdf-syntax-grammar
#WD-rdf-testcases
#			   );

my (@status_shortnames)=qw(
NOTE-lbase
WD-rdf-mt
WD-rdf-primer
WD-rdf-schema
WD-rdf-syntax-grammar
WD-rdf-testcases
WD-rdf-concepts
			   );

# You should not need to look below here.

######################################################################
# Main

chdir $from_top or die "Cannot chdir $from_top - $! \n";

my(%our_name)=map {$_ => 1} @status_shortnames;

my($from_year,$from_month,$from_day)=($from_date =~ /^(\d\d\d\d)(\d\d)(\d\d)$/);
my($from_year2,$from_month2,$from_day2)=($from_date2 =~ /^(\d\d\d\d)(\d\d)(\d\d)$/);
my($to_year,$to_month,$to_day)=($to_date =~ /^(\d\d\d\d)(\d\d)(\d\d)$/);

my(@mnames)=qw(January February March April May June July August September October November December);
my $from_eng_date=sprintf("%02d %s %d",$from_day, $mnames[$from_month-1], $from_year);
my $from_eng_date2=sprintf("%02d %s %d",$from_day2, $mnames[$from_month2-1], $from_year2);
# Handle '5 Month' as well as '05 Month'
my $from_eng_date_a=sprintf("%d %s %d",$from_day, $mnames[$from_month-1], $from_year);
my $from_eng_date2_a=sprintf("%d %s %d",$from_day2, $mnames[$from_month2-1], $from_year2);
my $to_eng_date=sprintf("%02d %s %d",$to_day, $mnames[$to_month-1], $to_year);

# You especially shouldn't be looking at this
my $shortnames_re=join(";", 
		       map { qq{s%http://www.w3.org/(2001/sw/RDFCore/TR|TR/$from_year)/$_-$from_date(/?)%http://www.w3.org/TR/$to_year/$_-$to_date\$2%g; s%http://www.w3.org/(2001/sw/RDFCore/TR|TR/$from_year2)/$_-$from_date2(/?)%http://www.w3.org/TR/$to_year/$_-$to_date\$2%g; } } @status_shortnames);

# Although this is more straightforward
my $eng_date_re=qq{s%$from_eng_date%$to_eng_date%g; s%$from_eng_date2%$to_eng_date%g; };
$eng_date_re.=qq{s%$from_eng_date_a%$to_eng_date%g;}
  if $from_eng_date_a ne $from_eng_date;
$eng_date_re.=qq{s%$from_eng_date2_a%$to_eng_date%g;}
  if $from_eng_date2_a ne $from_eng_date2;


for my $status_name (@status_shortnames) {
  my $name=$status_name; $name =~ s/^[^-]+-//;

  my $from_dir=$status_name.'-'.$from_date;
  my $to_dir=$to_top.'/'.$name;

  warn "$status_name ($from_dir to $to_dir)\n";

  if(!-d $to_dir) {
    if($dryrun) {
      warn "  mkdir $to_dir\n";
    } else {
      mkpath([$to_dir], 0, 0755);
    }
  }

  my $cvs_entries="$from_dir/CVS/Entries";
  open(CVS, "$cvs_entries") or die "Cannot open $cvs_entries - $!\n";
  my $seen_overview=0;
  while(<CVS>) {
    if(m%^/([^/]+)/%) {
      my $status_name=$1;
      if($status_name eq 'Overview.html') {
	$seen_overview=1;
      } else {
	if($dryrun) {
	  warn "  copy \"$from_dir/$status_name\" to \"$to_dir/$status_name\"\n";
	} else {
	  copy("$from_dir/$status_name", "$to_dir/$status_name");
	}
      }
    }
  }
  close(CVS);
  die "No Overview.html seen in $from_dir\n" 
    unless $seen_overview;


  my $from_file=$from_dir."/Overview.html";
  my $to_file=$to_dir."/Overview.html";


  warn "  convert $from_file to $to_file\n";

  open(IN, "<$from_file") or die "Cannot open $from_file - $!\n";

  open(OUT, ">$to_file") or die "Canot create $to_file - $!\n"
    unless $dryrun;

  my $state=0;
  while(<IN>) {
    my $old=$_;
    s%Editors Working Draft\s+%W3C Working Draft %;

    if(m%<h\d%) {
      eval $eng_date_re;
      die "Debug this: '$eng_date_re'\n" if $@;
    }
  
    if($state == 0) {
      # This version
      if(m%http://www.w3.org/2001/sw/RDFCore/TR/(.+?)-(\d\d\d\d\d\d\d\d)/?% ||
	 m%http://www.w3.org/TR/2003/(.+?)-(\d\d\d\d\d\d\d\d)/?% ) {
	my($thisname,$thisdate)=($1,$2);
	my $to_url="http://www.w3.org/TR/$to_year/$thisname-$to_date/";

	die "    $.: Bad this version name $thisname != $status_name\n" if $thisname ne $status_name;
	die "    $.: Bad this version name $thisdate != $from_date\n" 
	  if ($thisdate ne $from_date && $thisdate ne $from_date2 && $thisdate ne $to_date);

	s%http://www.w3.org/2001/sw/RDFCore/TR/(.+?)-(\d\d\d\d\d\d\d\d)/?%$to_url%g;
	s%http://www.w3.org/TR/2003/(.+?)-(\d\d\d\d\d\d\d\d)/?%$to_url%g;
	$state=1;
      }
    } elsif($state == 1) {
      # After header, looking for end of toc
      if(m%<hr\s*/>%i) {
	$state=2;
	warn "    $.: Found end of toc\n"
	  if $debug;
      }
    } elsif($state == 2) {
      # Body
      if(m%<h\d.*references%i || m%<a id=".*references.*?"%i) {
	$state=3;
	warn "    $.: Found references\n"
	  if $debug;
      } else {

	eval $shortnames_re;
	die "Debug this: '$shortnames_re'\n" if $@;
	
	eval $eng_date_re;
	die "Debug this: '$eng_date_re'\n" if $@;
      }	

      if(m%(http://www.w3.org/2001/sw/RDFCore/TR/(.+?)-(\d\d\d\d\d\d\d\d)/?)% 
	 ||
	 m%(http://www.w3.org/TR/\d\d\d\d/(.+?)-(\d\d\d\d\d\d\d\d)/?)%) {
	my($url,$thisname,$thisdate)=($1,$2,$3);
	if(exists $our_name{$thisname}) {
	  warn "    $.: Use of $thisname date $thisdate in $url\n" 
	    unless ($thisdate eq $from_date || $thisdate eq $from_date2
		    || $thisdate eq $to_date);
	}
      }

    } elsif($state == 3) {
      # In references

      if(m%<h\d.*(change|append)%i || m%<a id=".*?changes.*?"%i) { 
	$state=4;
	warn "    $.: Found changes/changelog/appendix\n"
	  if $debug;
      } else {
	eval $shortnames_re;
	die "Debug this: '$shortnames_re'\n" if $@;
	
	eval $eng_date_re;
	die "Debug this: '$eng_date_re'\n" if $@;
	
	if(m%(http://www.w3.org/2001/sw/RDFCore/TR/(.+?)-(\d\d\d\d\d\d\d\d)/?)% 
	   ||
	   m%(http://www.w3.org/TR/\d\d\d\d/(.+?)-(\d\d\d\d\d\d\d\d)/?)%) {
	  my($url,$thisname,$thisdate)=($1,$2,$3);
	  if(exists $our_name{$thisname}) {
	    warn "    $.: References use of $thisname date $thisdate in $url\n" 
	      unless ($thisdate eq $from_date || $thisdate eq $from_date2
		      || $thisdate eq $to_date);
	  }
	}
      }
    }
    
   print OUT
     unless $dryrun;

  if($_ ne $old) {
    warn "    $.: From\n${old}          To\n$_"
      if $debug >1;
    }
  }
  close(IN);

  close(OUT)
    unless $dryrun;
}
