#!/usr/tools/bin/perl
# $Id: dtd2yacc.pl,v 1.1 1995/01/16 20:53:41 connolly Exp $

unshift(@INC, '/u/connolly/contrib/perlSGML/lib'); #@@

require 'dtd.pl';

&DTDread_dtd(STDIN);

&parseArgs;


print <<EOF;
%token TAGC  /* >  */
%token DATA  /* characters */

    /* attribute values, ala TEI guidelines */
%token NUMBER  /* [0-9]+ */
%token NAME    /* [a-z][a-z0-9.-]* */
%token NMTOKEN /* [a-z0-9.-]+ */
%token LITERAL /* anything, in quotes */

EOF

local(%start, %end, %attr, %val, %all);
foreach $elt (&DTDget_elements()){
    do tokensForElement($elt);
}
if($Array){
    local($s, $e, $a, $v);
    grep($all{$_} = 1, (keys %start,keys %end, keys %attr, keys %val));
    foreach (sort(keys %all)){
	$s = $start{$_} || -1;
	$e = $end{$_} || -1;
	$a = $attr{$_} || -1;
	$v = $val{$_} || -1;
	print ARRAY "\t{ \"$_\",\t$s,\t$e,\t$a,\t$v },\n";
    }
}

print <<EOF;

%%

EOF


foreach $root (&DTDget_top_elements()){
    print STDERR "@#root: $root\n";
    do productionsForElement($root);
}


print <<EOF;
data : DATA
    | data DATA
    ;

attrval_cdata : LITERAL | NMTOKEN | NAME | NUMBER ;
attrval_number : NUMBER
    | LITERAL /* @# check quoted stuff for NUMBER syntax */
    ;
attrval_numbers : NUMBER
    | LITERAL /* @# check quoted stuff for NUMBERS syntax */
    ;
attrval_name : NAME
    | LITERAL /* @# check quoted stuff for NAME syntax */
    ;
attrval_id : NAME
    | LITERAL /* @# check quoted stuff for NAME syntax; check uniqueness */
    ;
attrval_names : NAME
    | LITERAL /* @# check quoted stuff for NAMES syntax */
    ;

EOF


##############################


sub tokensForElement{
    local($elt) = @_;

    print "%token START_$elt\n";
    $start{$elt} = "START_$elt";

    if((&DTDget_base_children($elt))[0] ne 'EMPTY'){
	print "%token END_$elt\n";
	$end{$elt} = "END_$elt";
    }

    %attrs = &DTDget_elem_attr($elt);

    # filter out #FIXED attributes

    foreach $attr (sort keys %attrs){
	local($def, @vals) = split(/$;/, $attrs{$attr});

	next if $def eq '#FIXED';

	$attr =~ s/-/__/g; #@ yacc can't handle - in identifiers
	if(!$Token{"ANAME_$attr"}++){
	    print "%token ANAME_$attr\n";
	    $attr{$attr} = "ANAME_$attr";
	}

	foreach $val (@vals){
	    $val =~ tr/A-Z/a-z/; #@ why is perlSGML inconsistent?

	    next if $val eq 'cdata'
		|| $val eq 'number'
		    || $val eq 'numbers'
			|| $val eq 'names'
			    || $val eq 'id'
				|| $val eq 'name';


	    print "%token VNAME_$val\n" unless $Token{"VNAME_$val"}++;
	    $val{$val} = "VNAME_$val";
	}
    }

}


sub productionsForElement{
    local($elt) = @_;

    return if $Productions{$elt . $Context}++;

    print STDERR "@#... doing $elt $Context\n";

    local($anyattrs);
    local(@children) = &DTDget_base_children($elt);

    if($children[0] eq 'EMPTY'){
	print "elt_$elt$Context : start_$elt\n";
	$anyattrs = &attributes($elt);
	do startEnd($elt, 'EMPTY', $anyattrs);
	print "\n";

    }else{
	print "elt_$elt$Context : start_$elt content_$elt$Context end_$elt;\n";

	$anyattrs = &attributes($elt);
	do startEnd($elt, '', $anyattrs);

	local(@Exclusions) = @Exclusions;
	local(@Inclusions) = @Inclusions;
	local($Context) = $Context;

	local(@excl, @incl, $cont);
	@excl = &DTDget_exc_children($elt);
	@incl = &DTDget_inc_children($elt);
	$cont = $Context;

#	print STDERR "@# incl for $elt: @incl\n";
#	print STDERR "@# excl for $elt: @excl\n";

	@Exclusions = (@excl, @Exclusions) if $#excl >= 0;
	@Inclusions = (@incl, @Inclusions) if $#incl >= 0;

	if($#excl >= 0 || $#incl >= 0){
	    $Context = "_$elt" . "_" . $Context unless $Context =~ /_$elt[_]/;
	}

	do content($elt, $cont);

	print "\n";

	foreach $child (@children, @Inclusions){
#	    print STDERR "@#child: $child\n";
	    next if $child =~ /^\#/;

	    next if grep($_ eq $child, @Exclusions);

	    #@ bug in perlSGML?
	    next if $child eq 'cdata' || $child eq 'rcdata';

	    do productionsForElement($child);
	}
    }
}


sub attributes{
    local($elt) = @_;
    local(%attrs, @names);

    return if $StartEnd{$elt};

    %attrs = &DTDget_elem_attr($elt);

    # filter out #FIXED attributes

    foreach $attr (sort keys %attrs){
	local(@vals) = split(/$;/, $attrs{$attr});

	next if $vals[0] eq '#FIXED';

#	print STDERR "@#attr vals ($elt,$attr) = @vals\n";
	push(@names, $attr);
    }

    if(@names){
	print "attrs_$elt :\n";

	foreach $attr (@names){
	    local(@vals) = split(/$;/, $attrs{$attr});

	    $attr =~ s/-/__/g; #@ yacc can't handle - in identifiers

	    #@@ attributes aren't really repeatable like this...
	    print "\t| attrs_$elt attr_$elt" . "_$attr";

	    if($vals[0] eq '#REQUIRED'){
		print "  /* REQUIRED */\n";
	    }else{
		print "\n";
	    }
	}

	print "\t;\n\n";

	foreach $attr (@names){
	    do attribute($elt, $attr, split(/$;/, $attrs{$attr}));
	}
	    
    }

    return @names;
}

sub attribute{
    local($elt, $attr, $def, @values) = @_;

    $attr =~ s/-/__/g; #@ yacc can't handle - in identifiers

    local($lhs);
    $lhs = "attr_$elt" . "_$attr";

    if($values[0] eq 'CDATA'){
	print "$lhs : ANAME_$attr attrval_cdata;\n";
    }
    elsif($values[0] eq 'NUMBER'){
	print "$lhs : ANAME_$attr attrval_number;\n";
    }
    elsif($values[0] eq 'NAME'){
	print "$lhs : ANAME_$attr attrval_name;\n";
    }
    elsif($values[0] eq 'NAMES'){
	print "$lhs : ANAME_$attr attrval_names;\n";
    }
    elsif($values[0] eq 'ID'){
	print "$lhs : ANAME_$attr attrval_id;\n";
    }
    else{
	local($val);

	print "$lhs : ANAME_$attr LITERAL /*@# check: @values*/\n";
	foreach $val (@values){

	    $val =~ tr/A-Z/a-z/; #@ why is perlSGML inconsistent?
	
	    print "\t| ANAME_$attr VNAME_$val\n";
	    print "\t| VNAME_$val\n";
	}
	print "\t;\n";

    }
}


sub startEnd{
    local($elt, $empty, $anyattrs) = @_;

    return if $StartEnd{$elt}++;

    local($s, $e);

    ($s, $e) = $dtd'ElemTag{$elt} =~ /(\S)\s+(\S)/; #'

#    print STDERR "@# attrs for $elt: ", &DTDget_elem_attr($elt), "\n";

    if($anyattrs){
	print "start_$elt : START_$elt attrs_$elt TAGC\n";
    }else{
	print "start_$elt : START_$elt TAGC\n";
    }

    #@@ NET tags
    #@@ empty start tags

    if ($s eq 'O'){
	print "\t| /* omitted start tag */\n";
    }
    print "\t;\n";

    if($empty ne 'EMPTY'){
	print "end_$elt : END_$elt\n";

	#@@ empty end tags

	if ($e eq 'O'){
	    print "\t| /* omitted end tag */\n";
	}
	print "\t;\n";
    }

    print "\n";
}


sub content{
    local($elt, $context) = @_;

    #@@ IGNORING CONNECTIVES!!!

    local(@children) = &DTDget_base_children($elt);
    local($D) = 1;

    print "content_$elt$context : /* empty */\n";
    print "\t| content_$elt$context cont_$elt$context; /*@@assume repeated-or*/\n";
    print "cont_$elt$context : ";

    #@@ ANY, RCDATA
    if($children[0] eq 'cdata'){
	print "\t data\n";
    }else{
	local($sep);
	foreach $child (@children, @Inclusions){
	    next if grep($_ eq $child, @Exclusions);

	    if($child eq '#pcdata'){
		print "\t$sep data\n";
		$D = 0;
	    }else{
		print "\t$sep elt_$child$Context\n";
	    }
	    $sep = '|';
	}
	if ($D){
	    print "\t$sep DATA /* skip DATA in eltement content */\n";
	}
    }

    print "\t;\n";
}


sub parseArgs{
    while(($opt = $ARGV[0]) =~ /^-/){
	shift(@ARGV);
	if($opt eq '-array'){
	    $Array = shift(@ARGV);
	}
	else{
	    warn "$0: option $opt not recognized\n";
	}
    }

    if($Array){
	open(ARRAY, ">$Array") || die "$0: $Array: $!\n";
    }
}
